This notebook estimates the indicators based on the raw data and
perfomrs the main analyses and figures used in the manuscript of the
multicountry paper. The input is the “clean kobo output” that was first
cleaned by 1.2_cleaning.
Packages and functions
Load required libraries:
library(tidyr)
library(dplyr)
library(readr)
library(utile.tools)
library(stringr)
library(ggplot2)
library(ggsankey)
library(alluvial)
library(viridis)
library(cowplot)
library(lme4)
library(knitr)
library(glmmTMB)
Load required functions. These custom fuctions are available at: https://github.com/AliciaMstt/GeneticIndicators
source("get_indicator1_data.R")
source("get_indicator2_data.R")
source("get_indicator3_data.R")
source("get_metadata.R")
source("transform_to_Ne.R")
source("estimate_indicator1.R")
Other custom functions:
### not in
'%!in%' <- function(x,y)!('%in%'(x,y))
#' Duplicates data to create additional facet. Thanks to https://stackoverflow.com/questions/18933575/easily-add-an-all-facet-to-facet-wrap-in-ggplot2
#' @param df a dataframe
#' @param col the name of facet column
#'
CreateAllFacet <- function(df, col){
df$facet <- df[[col]]
temp <- df
temp$facet <- "all"
merged <-rbind(temp, df)
# ensure the facet value is a factor
merged[[col]] <- as.factor(merged[[col]])
return(merged)
}
Custom colors:
## IUCN official colors
# Assuming order of levels is: "re", "cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown" (for regional, and w/o "re" for global). Make sure to change the levels to that order before plotting.
IUCNcolors<-c("brown2", "darkorange", "yellow", "green", "darkgreen", "darkgrey", "azure2", "bisque1")
IUCNcolors_regional<-c("darkorchid2", "brown2", "darkorange", "yellow", "green", "darkgreen", "darkgrey", "azure2", "bisque1")
## nice soft ramp for taxonomic groups
taxoncolors<-cividis(12) # same than using cividis(length(levels(as.factor(metadata$taxonomic_group))))
## Colors for simplified methods to define populations
# assuming the levels (see how this was created in the section "Simplify combinations of methods to define populations"): of running levels(as.factor(ind2_data$defined_populations_simplified)) (after new order)
# get a set of colors to highlight genetic and geographic with similar colors
simplifiedmethods_colors<-c("#FFA07A", #"dispersal_buffer"
"#7f611b", # "eco_biogeo_proxies"
"#668cd1", # "genetic_clusters"
"#668cd1", # "genetic_clusters eco_biogeo_proxies"
"#45c097", # "genetic_clusters geographic_boundaries"
"#d4b43e", # "geographic_boundaries"
"#d4b43e", # "geographic_boundaries eco_biogeo_proxies"
"#d4b43e", # "geographic_boundaries management_units"
"#b34656", # "management_units"
"#be72c9", # "other"
"#be72c9")# "other_combinations"
grouped_taxon_colors<-c("#9f43c8", "#91c637", "#e5463c")
Get data
Get indicators and metadata data from clean kobo output
# Get data:
kobo_clean<-read.csv(file="kobo_output_clean.csv", header=TRUE)
# Extract indicator 1 data from kobo output, show most relevant columns
ind1_data<-get_indicator1_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind1_data[,c(1:3, 12:14)])
# Extract Proportion of maintained populations (indicator) data from kobo output, show most relevant columns
ind2_data<-get_indicator2_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind2_data[,c(1:3, 9:10,13)])
# Extract indicator 3 data from kobo output, show most relevant columns
ind3_data<-get_indicator3_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind3_data[,c(1:3, 9:11)])
# extract metadata, show most relevant columns
metadata<-get_metadata(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(metadata[,c(1:3, 12, 25,26, 64)])
Get population data for those species assessed using the tabular text
template instead of Kobo. This file was produced by the script
1.2_cleaning.Rmd
ind1_data_from_templates<-read.csv(file="ind1_data_from_templates.csv")
Add data recorded using the population template to the ind1_data
already in the nice format.
ind1_data<-rbind(ind1_data, ind1_data_from_templates)
Estimate indicators
Indicator 1 (proportion of populations with Ne >500):
Show most relevant columns of indicator 1 data
head(ind1_data[,c(1:3, 12:14)])
Remember what the function to transform NcRange and NcPoint data into
Ne does:
# check what the custom funciton does
transform_to_Ne
## function (ind1_data, ratio = 0.1)
## {
## ratio = ratio
## if (!is.numeric(ratio) || ratio < 0 || ratio > 1) {
## stop("Invalid argument. Please provide a number within the range 0 to 1, using `.` to delimit decimals.")
## }
## else {
## ind1_data = ind1_data
## ind1_data <- ind1_data %>% mutate(Nc_from_range = case_when(NcRange ==
## "more_5000_bymuch" ~ 10000, NcRange == "more_5000" ~
## 5500, NcRange == "less_5000_bymuch" ~ 500, NcRange ==
## "less_5000" ~ 4050, NcRange == "range_includes_5000" ~
## 5001)) %>% mutate(Ne_from_Nc = case_when(!is.na(NcPoint) ~
## NcPoint * ratio, !is.na(Nc_from_range) ~ Nc_from_range *
## ratio)) %>% mutate(Ne_combined = if_else(is.na(Ne),
## Ne_from_Nc, Ne)) %>% mutate(Ne_calculated_from = if_else(is.na(Ne),
## if_else(!is.na(NcPoint), "NcPoint ratio", if_else(!is.na(Nc_from_range),
## "NcRange ratio", NA_character_)), "genetic data"))
## print(ind1_data)
## }
## }
Use function to get Ne data from NcRange or NcPoint data, and their
combination (Ne estimated from Ne if Ne is available, otherwise, from
Nc)
ind1_data<-transform_to_Ne(ind1_data = ind1_data, ratio = 0.1)
## # A tibble: 5,049 × 40
## country_assessme… taxonomic_group taxon scientific_auth… genus year_assesment
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 sweden mammal Alce… (Linnaeus, 1758) Alces 2023
## 2 sweden mammal Alce… (Linnaeus, 1758) Alces 2023
## 3 sweden mammal Alce… (Linnaeus, 1758) Alces 2023
## 4 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 5 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 6 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 7 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 8 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 9 sweden fish Silu… (Linnaeus, 1758) Silu… 2023
## 10 sweden bird Dend… Bechstein 1803 Dend… 2022
## # … with 5,039 more rows, and 34 more variables: name_assessor <chr>,
## # email_assessor <chr>, kobo_tabular <chr>, defined_populations <chr>,
## # time_populations <chr>, X_validation_status <chr>, X_uuid <chr>,
## # multiassessment <chr>, population <chr>, Name <chr>, Origin <chr>,
## # IntroductionYear <chr>, Ne <dbl>, NeLower <dbl>, NeUpper <dbl>,
## # NeYear <chr>, GeneticMarkers <chr>, GeneticMarkersOther <chr>,
## # MethodNe <chr>, SourceNe <chr>, NcType <chr>, NcYear <chr>, …
Remember what the function to estimate indicator 1 does:
# check what the custom function does
estimate_indicator1
## function (ind1_data)
## {
## indicator1 <- ind1_data %>% group_by(X_uuid, ) %>% summarise(n_pops = n(),
## n_pops_Ne_data = sum(!is.na(Ne_combined)), n_pops_more_500 = sum(Ne_combined >
## 500, na.rm = TRUE), indicator1 = n_pops_more_500/n_pops_Ne_data) %>%
## left_join(metadata)
## print(indicator1)
## }
Now estimate indicator 1 :)
indicator1<-estimate_indicator1(ind1_data = ind1_data)
## Joining, by = "X_uuid"
## # A tibble: 600 × 69
## X_uuid n_pops n_pops_Ne_data n_pops_more_500 indicator1 country_assessm…
## <chr> <int> <int> <int> <dbl> <chr>
## 1 010d85cd-5… 2 1 1 1 united_states
## 2 016d59ae-9… 1 1 0 0 mexico
## 3 017ff4b6-5… 1 0 0 NaN colombia
## 4 019bd95f-b… 1 1 0 0 sweden
## 5 01b10b29-9… 1 1 1 1 south_africa
## 6 0301e6b3-b… 3 3 3 1 france
## 7 036baa83-5… 1 0 0 NaN colombia
## 8 037a15b2-f… 3 2 0 0 colombia
## 9 037d6c8f-7… 4 2 2 1 united_states
## 10 03f03179-1… 1 1 1 1 south_africa
## # … with 590 more rows, and 63 more variables: taxonomic_group <chr>,
## # taxon <chr>, scientific_authority <chr>, genus <chr>, year_assesment <chr>,
## # name_assessor <chr>, email_assessor <chr>, common_name <chr>,
## # kobo_tabular <chr>, X_validation_status <chr>, GBIF_taxonID <int>,
## # NCBI_taxonID <chr>, national_taxonID <chr>, source_national_taxonID <chr>,
## # other_populations <chr>, time_populations <chr>, defined_populations <chr>,
## # source_definition_populations <chr>, map_populations <chr>, …
Proportion of maintained populations (indicator 2) = proportion of
populations within species which are maintained.
Proportion of maintained populations (indicator) is the he proportion
of populations within species which are maintained. This can be
estimated based on the n_extant_populations and
n_extint_populations, as follows:
ind2_data$indicator2<- ind2_data$n_extant_populations / (ind2_data$n_extant_populations + ind2_data$n_extint_populations)
head(ind2_data$indicator2)
## [1] 1.0000000 0.5000000 0.2941176 1.0000000 0.3333333 1.0000000
Number of taxa with genetic monitoring squemes (indicator3)
Indicator 3 refers to the number (count) of taxa by country in which
genetic monitoring is occurring. This is stored in the variable
temp_gen_monitoring as a “yes/no” answer for each taxon, so
to estimate the indicator, we only need to count how many said “yes”,
keeping only one of the records when the taxon was multiassessed:
indicator3<-ind3_data %>%
# keep only one record if the taxon was assessed more than once within the country
select(country_assessment, taxon, temp_gen_monitoring) %>%
filter(!duplicated(.)) %>%
# count "yes" in tem_gen_monitoring by country
filter(temp_gen_monitoring=="yes") %>%
group_by(country_assessment) %>%
summarise(n_taxon_gen_monitoring= n())
Join indicators and metadata in a single table
It could be useful to have the estimated indicator and the metadata
in a single large table.
indicators_full<-left_join(metadata, indicator1) %>%
left_join(ind2_data) %>%
left_join(ind3_data)
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "common_name", "kobo_tabular", "X_validation_status",
## "X_uuid", "GBIF_taxonID", "NCBI_taxonID", "national_taxonID",
## "source_national_taxonID", "other_populations", "time_populations",
## "defined_populations", "source_definition_populations", "map_populations",
## "map_populations_URL", "habitat_decline_area", "source_populations",
## "popsize_data", "ne_pops_exists", "nc_pops_exists", "ratio_exists",
## "species_related", "ratio_species_related", "ratio_year",
## "source_popsize_ratios", "species_comments", "realm", "IUCN_habitat",
## "other_habitat", "national_endemic", "transboundary_type", "other_explain",
## "country_proportion", "species_range", "rarity", "occurrence_extent",
## "occurrence_area", "pop_fragmentation_level", "species_range_comments",
## "global_IUCN", "regional_redlist", "other_assessment_status",
## "other_assessment_name", "source_status_distribution", "fecundity",
## "semelparous_offpring", "reproductive_strategy", "reproductive_strategy_other",
## "adult_age_data", "other_reproductive_strategy", "longevity_max",
## "longevity_median", "longevity_maturity", "longevity_age",
## "life_history_based_on", "life_history_sp_basedon", "sources_life_history",
## "multiassessment")
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "X_validation_status", "X_uuid", "other_populations",
## "time_populations", "defined_populations", "source_definition_populations",
## "map_populations", "map_populations_URL", "habitat_decline_area",
## "source_populations", "multiassessment")
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "X_validation_status", "X_uuid", "multiassessment")
Save indicators data
Save indicators data and metadata to csv files, useful for analyses
outside R.
# save processed data
write.csv(ind1_data, "ind1_data.csv", row.names = FALSE)
write.csv(indicators_full, "indicators_full.csv", row.names = FALSE)
write.csv(ind2_data, "ind2_data.csv", row.names = FALSE)
write.csv(ind3_data, "ind3_data.csv", row.names = FALSE)
write.csv(metadata, "metadata.csv", row.names = FALSE)
Change country name to nicer labels
To have nice levels in the plots we will change the way country names
are written:
# make factor
metadata$country_assessment<-as.factor(metadata$country_assessment)
indicators_full$country_assessment<-as.factor(indicators_full$country_assessment)
ind2_data$country_assessment<-as.factor(ind2_data$country_assessment)
ind1_data$country_assessment<-as.factor(ind1_data$country_assessment)
indicator1$country_assessment<-as.factor(indicator1$country_assessment)
# original levels
levels(metadata$country_assessment)
## [1] "australia" "belgium" "colombia" "france"
## [5] "japan" "mexico" "south_africa" "sweden"
## [9] "united_states"
# change
levels(metadata$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(indicators_full$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(ind1_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(ind2_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(indicator1$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
Simplify combinations of methods to define populations
The methods used to define populations come from a check box question
were one or more of the following categories can be selected:
genetic_clusters, geographic_boundaries, eco_biogeo_proxies,
adaptive_traits, management_units, other. As a consequence any
combination of the former can be possible. Leading to the following
frequency table:
table(indicators_full$defined_populations)
##
## adaptive_traits
## 5
## adaptive_traits management_units
## 1
## dispersal_buffer
## 159
## dispersal_buffer adaptive_traits
## 2
## dispersal_buffer eco_biogeo_proxies
## 1
## dispersal_buffer other
## 1
## eco_biogeo_proxies
## 44
## eco_biogeo_proxies adaptive_traits
## 3
## eco_biogeo_proxies dispersal_buffer
## 7
## eco_biogeo_proxies management_units
## 3
## eco_biogeo_proxies other
## 2
## genetic_clusters
## 108
## genetic_clusters adaptive_traits
## 7
## genetic_clusters dispersal_buffer
## 11
## genetic_clusters eco_biogeo_proxies
## 26
## genetic_clusters eco_biogeo_proxies adaptive_traits
## 3
## genetic_clusters eco_biogeo_proxies adaptive_traits management_units
## 2
## genetic_clusters eco_biogeo_proxies management_units
## 1
## genetic_clusters geographic_boundaries
## 69
## genetic_clusters geographic_boundaries adaptive_traits
## 5
## genetic_clusters geographic_boundaries eco_biogeo_proxies
## 8
## genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits
## 1
## genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits management_units
## 1
## genetic_clusters geographic_boundaries eco_biogeo_proxies management_units
## 1
## genetic_clusters geographic_boundaries management_units
## 8
## genetic_clusters management_units
## 5
## genetic_clusters other
## 2
## geographic_boundaries
## 274
## geographic_boundaries adaptive_traits
## 12
## geographic_boundaries adaptive_traits management_units other
## 1
## geographic_boundaries dispersal_buffer
## 1
## geographic_boundaries eco_biogeo_proxies
## 105
## geographic_boundaries eco_biogeo_proxies adaptive_traits
## 3
## geographic_boundaries eco_biogeo_proxies management_units
## 3
## geographic_boundaries eco_biogeo_proxies other
## 2
## geographic_boundaries management_units
## 24
## geographic_boundaries other
## 12
## management_units
## 29
## management_units other
## 1
## other
## 19
It is hard to group the above methods, so we will keep the original
groups with n >=19 in the above list, and tag the combinations that
appear few times as as “other_combinations”.
Which groups have n>=19?
x<-as.data.frame(table(indicators_full$defined_populations)[table(indicators_full$defined_populations) >= 19])
colnames(x)[1]<-"method"
x
We can add this new column to the metadata and indicator data:
### for indicators
indicators_full<- indicators_full %>%
mutate(defined_populations_simplified = case_when(
# if the method is in the list of methods n>=19 then keep it
defined_populations %in% x$method ~ defined_populations,
TRUE ~ "other_combinations"))
### for meta
metadata<- metadata %>%
mutate(defined_populations_simplified = case_when(
# if the method is in the list of methods n>=19 then keep it
defined_populations %in% x$method ~ defined_populations,
TRUE ~ "other_combinations"))
### for ind1 raw data
ind1_data<- ind1_data %>%
mutate(defined_populations_simplified = case_when(
# if the method is in the list of methods n>=19 then keep it
defined_populations %in% x$method ~ defined_populations,
TRUE ~ "other_combinations"))
Check n for simplified methods:
table(indicators_full$defined_populations_simplified)
##
## dispersal_buffer
## 159
## eco_biogeo_proxies
## 44
## genetic_clusters
## 108
## genetic_clusters eco_biogeo_proxies
## 26
## genetic_clusters geographic_boundaries
## 69
## geographic_boundaries
## 274
## geographic_boundaries eco_biogeo_proxies
## 105
## geographic_boundaries management_units
## 24
## management_units
## 29
## other
## 19
## other_combinations
## 115
Table of equivalences:
indicators_full %>%
select(defined_populations, defined_populations_simplified) %>%
filter(!duplicated(defined_populations))
Create nicer names for ploting
# original method names
levels(as.factor(indicators_full$defined_populations_simplified))
## [1] "dispersal_buffer"
## [2] "eco_biogeo_proxies"
## [3] "genetic_clusters"
## [4] "genetic_clusters eco_biogeo_proxies"
## [5] "genetic_clusters geographic_boundaries"
## [6] "geographic_boundaries"
## [7] "geographic_boundaries eco_biogeo_proxies"
## [8] "geographic_boundaries management_units"
## [9] "management_units"
## [10] "other"
## [11] "other_combinations"
# nicer names
nice_names <- c("dispersal buffer",
"eco- biogeographic proxies",
"genetic clusters",
"genetic clusters & eco- biogeographic proxies",
"genetic clusters & geographic boundaries",
"geographic boundaries",
"geographic boundaries & eco- biogeographic proxies",
"geographic boundaries & management units",
"management units",
"other",
"other combinations")
### add them
indicators_full$defined_populations_nicenames <- factor(
indicators_full$defined_populations_simplified,
levels = levels(as.factor(indicators_full$defined_populations_simplified)),
labels = nice_names)
# metadata
metadata$defined_populations_nicenames <- factor(
metadata$defined_populations_simplified,
levels = levels(as.factor(metadata$defined_populations_simplified)),
labels = nice_names)
#check names match
select(metadata, defined_populations_nicenames, defined_populations_simplified)
levels(indicators_full$defined_populations_nicenames)
## [1] "dispersal buffer"
## [2] "eco- biogeographic proxies"
## [3] "genetic clusters"
## [4] "genetic clusters & eco- biogeographic proxies"
## [5] "genetic clusters & geographic boundaries"
## [6] "geographic boundaries"
## [7] "geographic boundaries & eco- biogeographic proxies"
## [8] "geographic boundaries & management units"
## [9] "management units"
## [10] "other"
## [11] "other combinations"
Averaging multiassessments
Some taxa were assessed twice or more times, for example to account
for uncertainty on how to divide populations. This information is stored
in variable multiassessment of the metadata (created by
get_metadata()). An example of taxa with multiple
assessments:
metadata %>%
filter(multiassessment=="multiassessment") %>%
select(taxonomic_group, taxon, country_assessment, multiassessment) %>%
arrange(taxon, country_assessment) %>%
head()
Multiassessments allow to account for uncertainty in the number of
populations or the size of them. We can examine how the indicators value
species by species as done elsewhere in these analyses (see below
“Values for indicator 1 and 2 for multiassessed species), but to examine
global trends, some of the figures below use the average. The
averages are stored in a different column, labeled
indicator[1 or 2]_mean.
indicators_averaged<-indicators_full %>%
# group desired multiassessments
group_by(country_assessment, multiassessment, taxon) %>%
# estimate means
mutate(indicator1_mean=mean(indicator1, na.rm=TRUE)) %>%
mutate(indicator2_mean=mean(indicator2, na.rm=TRUE)) %>%
# change NaN for NA (needed due to the NAs and 0s in the dataset)
mutate_all(~ifelse(is.nan(.), NA, .))
## `mutate_all()` ignored the following grouping variables:
## • Columns `country_assessment`, `multiassessment`, `taxon`
## ℹ Use `mutate_at(df, vars(-group_cols()), myoperation)` to silence the message.
Examples of how this looks to check it was done properly. For
indicator 1:
indicators_averaged %>%
filter(taxon == "Barbastella barbastellus") %>%
select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)
indicators_averaged %>%
filter(taxon == "Rana dalmatina") %>%
select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)
indicators_averaged %>%
filter(taxon == "Ambystoma cingulatum") %>%
select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)
For Proportion of maintained populations (indicator):
indicators_averaged %>%
filter(taxon == "Ambystoma cingulatum") %>%
select(taxon, country_assessment, multiassessment, indicator2, indicator2_mean)
Because we will use the averages to show a single value for
multiasssessed taxa, we can keep only the first record for multiassessed
taxa.
indicators_averaged_one<-indicators_averaged[!duplicated(cbind(indicators_averaged$taxon, indicators_averaged$country_assessment)), ]
General description of records and taxa assessed by country
Records by country, including taxa assessed more than once (see below
for details on this)
ggplot(metadata, aes(x=country_assessment)) +
geom_bar(stat = "count") +
xlab("") +
ggtitle("Number of taxa assessed by country, including taxa assed more than once") +
theme_light()

To explore what kind of taxa countries assessed regardless of if they
assessed them once or more, we are going to use the subset
indicators_averaged_one, were we averaged the indicators
and kept only 1 record per assessment.
How many taxa were assessed (i.e. counting only once taxa that were
assessed multiple times)?
# how many?
nrow(indicators_averaged_one)
## [1] 909
Plot taxa assessed excluding duplicates, i.e. the real number of taxa
assessed:
p1<-ggplot(indicators_averaged_one, aes(x=country_assessment)) +
geom_bar(stat = "count") +
xlab("") +
ggtitle("Number of taxa assessed by country") +
theme_light()
p1

Of which countries and taxonomic groups are the taxa that were
assessed more than once?
p2<- indicators_averaged_one %>% # we use the _unique dataset so that multiassesed records are counted only once
filter(multiassessment=="multiassessment") %>%
ggplot(aes(x=taxonomic_group, fill=country_assessment)) +
geom_bar(stat = "count") +
theme(axis.text.x = element_text(angle = 45)) +
labs(fill="Country") +
xlab("") +
ggtitle("Number of taxa assessed more than once") +
theme_light()
p2
### Supplementary Figure: Number of species and multiassessed species
per country
plot_grid(p1 + ggtitle(""),
p2 + ggtitle(""), ncol = 1, labels = c("a)", "b)"))

Population size data (Has Nc or Ne? what type of Nc?)
Supplementary Figure: Population size data availability by
country
Countries have population size data (Nc or Ne) regardless of the
taxonomic group. The last panel includes the entire dataset:
## Duplicate data with an additional column "facet"
df<-CreateAllFacet(metadata, "country_assessment")
# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))
# Plot
ggplot(df, aes(x=taxonomic_group, fill=popsize_data)) +
geom_bar(stat = "count") +
coord_flip() +
facet_wrap(~facet, ncol = 5, scales="free_x") +
scale_fill_manual(values=c("#2ca02c", "#1f77b4", "grey80"),
breaks=c("yes", "data_for_species", "insuff_data_species"),
labels=c("Population level", "Species or subspecies level", "Insufficient data")) + labs(fill="Population size data availability",
x="",
y="Number of taxa (including records of taxa assessed more than once)") +
theme_light() +
theme(panel.border = element_blank(), legend.position="top")

Population size data availability in the entire dataset:
ggplot(metadata, aes(x=taxonomic_group, fill=popsize_data)) +
geom_bar(stat = "count") +
coord_flip() +
scale_fill_manual(values=c("#1f77b4", "grey80", "#2ca02c"),
breaks=c(levels(as.factor(metadata$popsize_data))),
labels=c("Species level or subspecies level", "Insufficient data", "Population level")) +
labs(fill="Population size data availability",
x="",
y="Number of taxa (including records of taxa assessed more than once)") +
theme_light() +
theme(legend.position="right")

Ne data yes or not? & Type of Nc data
Ne available by taxa? (species level)
p1<- metadata %>%
filter(!is.na(ne_pops_exists)) %>%
filter(ne_pops_exists!="other_genetic_info") %>%
ggplot(aes(x=country_assessment, fill=ne_pops_exists)) +
geom_bar() +
scale_fill_manual(labels=c("no", "yes"),
breaks=c("no_genetic_data", "ne_available"),
values=c("#ff7f0e", "#2ca02c")) +
xlab("") +
ylab("Number of taxa") +
labs(fill="Ne available \n(from genetic data)") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p1

Nc data available by taxa? (species level)
p2<-metadata %>%
filter(!is.na(nc_pops_exists)) %>%
ggplot(aes(x=country_assessment, fill=nc_pops_exists)) +
geom_bar() +
scale_fill_manual(values=c("#ff7f0e", "#2ca02c")) +
labs(fill="Nc available") +
xlab("") +
ylab("Number of taxa") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p2

What kind of Nc data? (dodge bars) This is at population level.
ind1_data %>%
filter(!is.na(NcType)) %>%
ggplot(aes(x=country_assessment, fill=NcType))+
geom_bar(position = "dodge") +
scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
breaks=c("Nc_point", "Nc_range", "unknown"),
values=c("#0072B2", "#E69F00", "grey80")) +
xlab("") +
ylab("Number of populations") +
labs(fill="Type of Nc data \nby population") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())

What kind of Nc data? (fill bars). This is at population level.
p3<-ind1_data %>%
filter(!is.na(NcType)) %>%
ggplot(aes(x=country_assessment, fill=NcType))+
geom_bar(position = "fill", color="white") +
scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
breaks=c("Nc_point", "Nc_range", "unknown"),
values=c("#0072B2", "#E69F00", "grey80")) +
xlab("") +
ylab("Proportion of populations") +
labs(fill="Type of Nc data \nby population") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p3

Data availability at the population level cosidering Ne and Nc
combined. This plot shows where data came from for the Ne value used for
estimating the indicator.
p4<-ind1_data %>%
mutate(Ne_calculated_from = replace_na(Ne_calculated_from, "NA")) %>%
ggplot(aes(x=country_assessment, fill=Ne_calculated_from))+
geom_bar(position = "fill", color="white") +
scale_fill_manual(labels=c("genetic data", "NcPoint ratio", "NcRange ratio", "NA"),
breaks=c("genetic data", "NcPoint ratio", "NcRange ratio", "NA"),
values=c("darkgreen", "#0072B2", "#E69F00", "grey80")) +
xlab("") +
scale_x_discrete(limits=rev) +
ylab("Proportion of populations") +
labs(fill="Data used to estimate Ne") +
theme_light() +
coord_flip() +
theme(text = element_text(size = 13), legend.position = "bottom", panel.border = element_blank())
p4

Range of values for Ne and Nc data
Range of Ne values by taxonomic group, without possible outliers (Ne
> 100000)
ind1_data %>%
filter(Ne < 100000) %>%
filter(!is.na(Ne)) %>%
ggplot(aes(x=taxonomic_group, y=Ne)) +
geom_boxplot(color="grey50") +
geom_jitter(size=.5, width = 0.1, color="darkred") +
xlab("") +
theme_light() +
theme(axis.text.x = element_text(angle = 45))

Check outliers
ind1_data %>%
filter(Ne > 100000) %>%
select(country_assessment, name_assessor, taxon, taxonomic_group, Ne, NeLower, NeUpper, multiassessment, population)
Range of Nc values (actual data point provided) by taxonomic group.
Without possible outliers.
ind1_data %>%
filter(!is.na(NcPoint)) %>%
filter(NcPoint < 10000000) %>%
ggplot(aes(x=taxonomic_group, y=NcPoint)) +
geom_boxplot(color="grey50") +
geom_jitter(size=.5, width = 0.1, color="darkred") +
xlab("") +
theme_light() +
theme(axis.text.x = element_text(angle = 45))

Check outliers
ind1_data %>%
filter(NcPoint > 10000000) %>%
select(country_assessment, name_assessor, taxon, taxonomic_group, population, NcPoint, NcLower, NcUpper, multiassessment, population)
Range of Ne values by taxonomic group from different sources. Without
possible outliers.
ind1_data %>%
filter(!is.na(Ne_combined)) %>%
filter(Ne < 100000) %>%
ggplot(aes(x=taxonomic_group, y=Ne_combined)) +
geom_boxplot(color="grey50") +
geom_jitter(size=.5, width = 0.1, color="darkred") +
xlab("") +
theme_light() +
theme(axis.text.x = element_text(angle = 45))

Range of Ne values by taxonomic group from different sources. Zoom to
Ne < 10,000
ind1_data %>%
filter(!is.na(Ne_combined)) %>%
filter(Ne < 10000) %>%
ggplot(aes(x=taxonomic_group, y=Ne_combined)) +
geom_boxplot(color="grey50") +
geom_jitter(size=.5, width = 0.1, color="darkred") +
xlab("") +
theme_light() +
theme(axis.text.x = element_text(angle = 45))

Missing data on extant and extinct populations
We have NA in Proportion of maintained populations (indicator)
because in some cases the number of extinct populations is unknown,
therefore the operation cannot be computed.
Counts
Total records with NA in extant populations:
sum(is.na(indicators_full$n_extant_populations))
## [1] 19
Taxa with NA in extant populations:
indicators_full %>%
filter(is.na(n_extant_populations)) %>%
select(country_assessment, taxonomic_group, taxon, n_extant_populations, n_extint_populations)
Total taxa with NA in extinct populations:
sum(is.na(indicators_full$n_extint_populations))
## [1] 379
Do taxa with NA for extant also have NA for extinct?
indicators_full$taxon[is.na(indicators_full$n_extant_populations)] %in% indicators_full$taxon[is.na(indicators_full$n_extint_populations)]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE
So out of the 972, we have 379 records with NA in
n_extinct and 19 records with NA in n_extant.
Of them, 19 have NA in both n_extant and n_extinct.
Plot missing data extinct populations
p5<-indicators_full %>%
ggplot(aes(x=country_assessment, fill=is.na(n_extint_populations))) +
geom_bar() +
scale_fill_manual(labels=c("number of populations known", "missing data"),
values=c("#2ca02c", "#ff7f0e")) +
labs(fill="Extinct populations") +
xlab("") + ylab("Number of taxa") +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p5

Main Figure: Data availability to estimate Ne indciator (origion of
data to estimate Ne) and PM indicator (missing data on pop
extinction):
Distribution of Nc, Ne and types of Ne in a single figure with 3
panels, using count for a & b, and proportions for c:
# plot
plot_grid(p4 + theme(legend.position = "right", legend.justification = c(0,.5)), # legend.justification aligns legends
p5 + coord_flip() +
scale_x_discrete(limits=rev) +
theme(legend.position = "right", legend.justification = c(0,.5)),
ncol = 1, labels = c("a)", "b)"), align = "v")

plot_grid(p1 + theme(legend.justification = c(0,.5)),
p2 + theme(legend.justification = c(0,.5)),
p3 + theme(legend.justification = c(0,.5)),
p4 + theme(legend.justification = c(0,.5)),
ncol=1, rel_widths = c(1,1,1,1), align = "v", labels=c("a)", "b)", "c)", "d)"), vjust = .7)
## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.

Main Figure: Method to define populations used by country and taxa
(alluvial)
Reformat data
select(metadata, defined_populations_nicenames, defined_populations_simplified)
# reformat data
foralluvial<-metadata %>% group_by(country_assessment, defined_populations_nicenames, taxonomic_group) %>%
summarise(n=n())
## `summarise()` has grouped output by 'country_assessment',
## 'defined_populations_nicenames'. You can override using the `.groups` argument.
# define colors
my_cols<- simplifiedmethods_colors
# we need a vector of colors by country for each row of the dataset, so:
methodspop<-as.factor(foralluvial$defined_populations_nicenames)
levels(methodspop)<-my_cols
methodspop<-as.vector(methodspop)
head(methodspop)
## [1] "#668cd1" "#668cd1" "#668cd1" "#668cd1" "#668cd1" "#45c097"
Plot
# plot
alluvial(foralluvial[,1:3], freq = foralluvial$n,
col=methodspop,
blocks=FALSE,
gap.width = 0.5,
cex=.8,
xw = 0.1,
cw = 0.2,
border = NA,
alpha = .7)

Effect of method used to define populations on number of populations
and PM and Ne>500 indicators
The analyses and plots below us a subset of data filtering outliers
(>500 populations) and using the simplified methods (see above).
Multiassessed species are considered independently (each assessment is a
data point).
Supplementary Figure: Number of maintained populations by country
and method
Visualizing the Number of maintained populations by country and
method is useful to interpret the models that would be run below.
indicators_full %>%
filter(n_extant_populations<500) %>% # filter outliers
# order countries vertically by similar number of pops
mutate(country_assessment = factor(country_assessment,
levels=c("Colombia", "Australia", "Belgium",
"Mexico", "France", "US",
"S. Africa", "Japan", "Sweden"))) %>%
ggplot(aes(x=defined_populations_nicenames, y=n_extant_populations,
fill=defined_populations_nicenames, color=defined_populations_nicenames)) +
geom_boxplot() +
geom_jitter(size=.3, width = 0.1, color="black") +
coord_flip() +
facet_wrap(country_assessment ~ ., nrow=3, scales="free_x") +
xlab("") +
ylab("Number of maintained populations") +
scale_fill_manual(values=alpha(simplifiedmethods_colors, .3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
text = element_text(size = 15))

Sampling size and plot of Ne values and Ne indicator by Ne origin
and method to define populations
This is useful to interpret the models that would be run below.
ind1_data %>%
filter(Ne_combined < 100000) %>% # filter outliers
ggplot(aes(x=defined_populations_simplified, y=Ne_combined,
color=Ne_calculated_from)) +
geom_boxplot(position = "dodge") +
geom_jitter(position = position_dodge(width = 0.75)) +
facet_wrap(country_assessment ~ ., nrow=3) +
coord_flip() +
theme_light()

Zoom to Ne 500
ind1_data %>%
filter(Ne_combined < 100000) %>% # filter outliers
ggplot(aes(x=defined_populations_simplified, y=Ne_combined,
color=Ne_calculated_from)) +
ylim(0,2000)+
geom_boxplot(position = "dodge") +
geom_jitter(position = position_dodge(width = 0.75)) +
facet_wrap(country_assessment ~ ., nrow=3) +
coord_flip() +
theme_light()
## Warning: Removed 105 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 105 rows containing missing values (`geom_point()`).

Summary table for sampling size by method and source of Ne:
x<- ind1_data %>%
filter(!is.na(Ne_calculated_from)) %>%
group_by(defined_populations_simplified, Ne_calculated_from) %>%
summarise(n=n())
## `summarise()` has grouped output by 'defined_populations_simplified'. You can
## override using the `.groups` argument.
kable(x)
| dispersal_buffer |
genetic data |
10 |
| dispersal_buffer |
NcPoint ratio |
225 |
| dispersal_buffer |
NcRange ratio |
1114 |
| eco_biogeo_proxies |
genetic data |
8 |
| eco_biogeo_proxies |
NcPoint ratio |
11 |
| eco_biogeo_proxies |
NcRange ratio |
66 |
| genetic_clusters |
genetic data |
43 |
| genetic_clusters |
NcPoint ratio |
32 |
| genetic_clusters |
NcRange ratio |
59 |
| genetic_clusters eco_biogeo_proxies |
genetic data |
4 |
| genetic_clusters eco_biogeo_proxies |
NcPoint ratio |
3 |
| genetic_clusters eco_biogeo_proxies |
NcRange ratio |
18 |
| genetic_clusters geographic_boundaries |
genetic data |
44 |
| genetic_clusters geographic_boundaries |
NcPoint ratio |
34 |
| genetic_clusters geographic_boundaries |
NcRange ratio |
83 |
| geographic_boundaries |
genetic data |
142 |
| geographic_boundaries |
NcPoint ratio |
404 |
| geographic_boundaries |
NcRange ratio |
421 |
| geographic_boundaries eco_biogeo_proxies |
genetic data |
8 |
| geographic_boundaries eco_biogeo_proxies |
NcPoint ratio |
68 |
| geographic_boundaries eco_biogeo_proxies |
NcRange ratio |
197 |
| geographic_boundaries management_units |
genetic data |
29 |
| geographic_boundaries management_units |
NcPoint ratio |
189 |
| geographic_boundaries management_units |
NcRange ratio |
22 |
| management_units |
NcPoint ratio |
48 |
| management_units |
NcRange ratio |
76 |
| other |
NcPoint ratio |
3 |
| other |
NcRange ratio |
14 |
| other_combinations |
genetic data |
61 |
| other_combinations |
NcPoint ratio |
130 |
| other_combinations |
NcRange ratio |
492 |
Same as above but adding country:
x<- ind1_data %>%
filter(!is.na(Ne_calculated_from)) %>%
group_by(country_assessment, defined_populations_simplified, Ne_calculated_from) %>%
summarise(n=n())
## `summarise()` has grouped output by 'country_assessment',
## 'defined_populations_simplified'. You can override using the `.groups`
## argument.
kable(x)
| Australia |
genetic_clusters |
genetic data |
7 |
| Australia |
genetic_clusters |
NcPoint ratio |
8 |
| Australia |
genetic_clusters geographic_boundaries |
genetic data |
15 |
| Australia |
genetic_clusters geographic_boundaries |
NcPoint ratio |
13 |
| Australia |
genetic_clusters geographic_boundaries |
NcRange ratio |
7 |
| Australia |
geographic_boundaries |
genetic data |
15 |
| Australia |
geographic_boundaries |
NcPoint ratio |
76 |
| Australia |
geographic_boundaries |
NcRange ratio |
59 |
| Australia |
geographic_boundaries management_units |
NcPoint ratio |
8 |
| Australia |
geographic_boundaries management_units |
NcRange ratio |
3 |
| Australia |
management_units |
NcRange ratio |
3 |
| Australia |
other_combinations |
NcRange ratio |
4 |
| Belgium |
dispersal_buffer |
genetic data |
10 |
| Belgium |
dispersal_buffer |
NcPoint ratio |
8 |
| Belgium |
dispersal_buffer |
NcRange ratio |
844 |
| Belgium |
genetic_clusters |
NcRange ratio |
7 |
| Belgium |
other_combinations |
genetic data |
40 |
| Belgium |
other_combinations |
NcPoint ratio |
2 |
| Belgium |
other_combinations |
NcRange ratio |
379 |
| Colombia |
geographic_boundaries eco_biogeo_proxies |
NcPoint ratio |
4 |
| Colombia |
geographic_boundaries eco_biogeo_proxies |
NcRange ratio |
43 |
| Colombia |
other_combinations |
NcRange ratio |
1 |
| France |
eco_biogeo_proxies |
genetic data |
7 |
| France |
genetic_clusters |
genetic data |
3 |
| France |
genetic_clusters |
NcRange ratio |
1 |
| France |
genetic_clusters eco_biogeo_proxies |
genetic data |
3 |
| France |
genetic_clusters eco_biogeo_proxies |
NcPoint ratio |
1 |
| France |
genetic_clusters geographic_boundaries |
genetic data |
6 |
| France |
genetic_clusters geographic_boundaries |
NcPoint ratio |
6 |
| France |
genetic_clusters geographic_boundaries |
NcRange ratio |
7 |
| France |
geographic_boundaries |
NcPoint ratio |
12 |
| France |
geographic_boundaries |
NcRange ratio |
22 |
| France |
geographic_boundaries eco_biogeo_proxies |
NcPoint ratio |
1 |
| France |
geographic_boundaries eco_biogeo_proxies |
NcRange ratio |
2 |
| France |
geographic_boundaries management_units |
genetic data |
15 |
| France |
geographic_boundaries management_units |
NcPoint ratio |
38 |
| France |
geographic_boundaries management_units |
NcRange ratio |
12 |
| France |
management_units |
NcPoint ratio |
10 |
| France |
management_units |
NcRange ratio |
8 |
| France |
other_combinations |
genetic data |
3 |
| France |
other_combinations |
NcPoint ratio |
20 |
| France |
other_combinations |
NcRange ratio |
10 |
| Japan |
dispersal_buffer |
NcPoint ratio |
213 |
| Japan |
dispersal_buffer |
NcRange ratio |
232 |
| Japan |
geographic_boundaries |
NcPoint ratio |
1 |
| Japan |
geographic_boundaries |
NcRange ratio |
5 |
| Mexico |
genetic_clusters |
genetic data |
13 |
| Mexico |
genetic_clusters |
NcPoint ratio |
15 |
| Mexico |
genetic_clusters |
NcRange ratio |
24 |
| Mexico |
genetic_clusters eco_biogeo_proxies |
genetic data |
1 |
| Mexico |
genetic_clusters eco_biogeo_proxies |
NcRange ratio |
17 |
| Mexico |
genetic_clusters geographic_boundaries |
genetic data |
2 |
| Mexico |
genetic_clusters geographic_boundaries |
NcPoint ratio |
6 |
| Mexico |
genetic_clusters geographic_boundaries |
NcRange ratio |
15 |
| Mexico |
geographic_boundaries |
NcRange ratio |
75 |
| Mexico |
other |
NcRange ratio |
1 |
| Mexico |
other_combinations |
genetic data |
4 |
| Mexico |
other_combinations |
NcRange ratio |
26 |
| S. Africa |
genetic_clusters |
genetic data |
12 |
| S. Africa |
genetic_clusters |
NcPoint ratio |
3 |
| S. Africa |
genetic_clusters |
NcRange ratio |
6 |
| S. Africa |
genetic_clusters eco_biogeo_proxies |
NcPoint ratio |
2 |
| S. Africa |
genetic_clusters eco_biogeo_proxies |
NcRange ratio |
1 |
| S. Africa |
genetic_clusters geographic_boundaries |
genetic data |
2 |
| S. Africa |
genetic_clusters geographic_boundaries |
NcPoint ratio |
2 |
| S. Africa |
genetic_clusters geographic_boundaries |
NcRange ratio |
11 |
| S. Africa |
geographic_boundaries |
genetic data |
2 |
| S. Africa |
geographic_boundaries |
NcPoint ratio |
28 |
| S. Africa |
geographic_boundaries |
NcRange ratio |
21 |
| S. Africa |
geographic_boundaries management_units |
NcRange ratio |
1 |
| S. Africa |
management_units |
NcPoint ratio |
1 |
| S. Africa |
other |
NcRange ratio |
1 |
| S. Africa |
other_combinations |
genetic data |
2 |
| S. Africa |
other_combinations |
NcPoint ratio |
8 |
| S. Africa |
other_combinations |
NcRange ratio |
4 |
| Sweden |
dispersal_buffer |
NcPoint ratio |
4 |
| Sweden |
dispersal_buffer |
NcRange ratio |
38 |
| Sweden |
eco_biogeo_proxies |
NcRange ratio |
26 |
| Sweden |
genetic_clusters |
genetic data |
7 |
| Sweden |
genetic_clusters |
NcPoint ratio |
3 |
| Sweden |
genetic_clusters |
NcRange ratio |
11 |
| Sweden |
genetic_clusters geographic_boundaries |
genetic data |
19 |
| Sweden |
genetic_clusters geographic_boundaries |
NcPoint ratio |
6 |
| Sweden |
genetic_clusters geographic_boundaries |
NcRange ratio |
41 |
| Sweden |
geographic_boundaries |
genetic data |
2 |
| Sweden |
geographic_boundaries |
NcPoint ratio |
67 |
| Sweden |
geographic_boundaries |
NcRange ratio |
168 |
| Sweden |
geographic_boundaries management_units |
NcPoint ratio |
3 |
| Sweden |
geographic_boundaries management_units |
NcRange ratio |
5 |
| Sweden |
management_units |
NcPoint ratio |
12 |
| Sweden |
other |
NcRange ratio |
10 |
| Sweden |
other_combinations |
genetic data |
3 |
| Sweden |
other_combinations |
NcPoint ratio |
7 |
| Sweden |
other_combinations |
NcRange ratio |
46 |
| US |
eco_biogeo_proxies |
genetic data |
1 |
| US |
eco_biogeo_proxies |
NcPoint ratio |
11 |
| US |
eco_biogeo_proxies |
NcRange ratio |
40 |
| US |
genetic_clusters |
genetic data |
1 |
| US |
genetic_clusters |
NcPoint ratio |
3 |
| US |
genetic_clusters |
NcRange ratio |
10 |
| US |
genetic_clusters geographic_boundaries |
NcPoint ratio |
1 |
| US |
genetic_clusters geographic_boundaries |
NcRange ratio |
2 |
| US |
geographic_boundaries |
genetic data |
123 |
| US |
geographic_boundaries |
NcPoint ratio |
220 |
| US |
geographic_boundaries |
NcRange ratio |
71 |
| US |
geographic_boundaries eco_biogeo_proxies |
genetic data |
8 |
| US |
geographic_boundaries eco_biogeo_proxies |
NcPoint ratio |
63 |
| US |
geographic_boundaries eco_biogeo_proxies |
NcRange ratio |
152 |
| US |
geographic_boundaries management_units |
genetic data |
14 |
| US |
geographic_boundaries management_units |
NcPoint ratio |
140 |
| US |
geographic_boundaries management_units |
NcRange ratio |
1 |
| US |
management_units |
NcPoint ratio |
25 |
| US |
management_units |
NcRange ratio |
65 |
| US |
other |
NcPoint ratio |
3 |
| US |
other |
NcRange ratio |
2 |
| US |
other_combinations |
genetic data |
9 |
| US |
other_combinations |
NcPoint ratio |
93 |
| US |
other_combinations |
NcRange ratio |
22 |
(a) Does the number of maintained pops vary with method used?
Plot number of populations by method
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
filter(!is.na(n_extant_populations)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(!is.na(n_extant_populations)) %>%
filter(n_extant_populations<500) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
mutate(myaxis = factor(myaxis,
levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
# plot for number of pops
p1<- df %>%
ggplot(aes(x=myaxis, y=n_extant_populations, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Number of maintained populations") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme(text = element_text(size = 13))
p1

Try showing species range type:
library(ggnewscale)
# points with species range type
p1.1<- df %>%
ggplot(aes(x=myaxis, y=n_extant_populations)) +
geom_boxplot(aes(color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
xlab("") + ylab("Number of maintained populations") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
new_scale_color() +
geom_jitter(size=1, width = 0.1, aes(color = species_range)) +
theme(text = element_text(size = 13))
p1.1

Prepare data for model (remove outliers and NA in desired variable)
and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>% # we remove "unknonw" because its n is too low, thus unbalancing the model
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 149
## eco_biogeo_proxies
## 43
## genetic_clusters
## 104
## genetic_clusters eco_biogeo_proxies
## 25
## genetic_clusters geographic_boundaries
## 67
## geographic_boundaries
## 269
## geographic_boundaries eco_biogeo_proxies
## 88
## geographic_boundaries management_units
## 24
## management_units
## 27
## other
## 14
## other_combinations
## 106
# total n
nrow(data_for_model)
## [1] 916
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
# make sure specis range is a factor
data_for_model$species_range<-as.factor(data_for_model$species_range)
Run model asking: Does the number of maintained pops vary with method
used? Test controlling for variation in the number of maintaiend
populations among countries:
m1<-glmer(data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified +
(1|data_for_model$country_assessment), family ="poisson")
See results:
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified +
## (1 | data_for_model$country_assessment)
##
## AIC BIC logLik deviance df.resid
## 28247.7 28305.6 -14111.9 28223.7 904
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.332 -2.953 -1.240 0.283 74.519
##
## Random effects:
## Groups Name Variance Std.Dev.
## data_for_model$country_assessment (Intercept) 1.038 1.019
## Number of obs: 916, groups: data_for_model$country_assessment, 9
##
## Fixed effects:
## Estimate
## (Intercept) 2.37353
## data_for_model$defined_populations_simplifieddispersal_buffer -0.98296
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 0.05530
## data_for_model$defined_populations_simplifiedgenetic_clusters -1.25162
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -1.48223
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.20237
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.03769
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units -0.11261
## data_for_model$defined_populations_simplifiedmanagement_units -0.44823
## data_for_model$defined_populations_simplifiedother -1.24185
## data_for_model$defined_populations_simplifiedother_combinations -0.54510
## Std. Error
## (Intercept) 0.34038
## data_for_model$defined_populations_simplifieddispersal_buffer 0.05291
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 0.03231
## data_for_model$defined_populations_simplifiedgenetic_clusters 0.06198
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.08929
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.03470
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.03961
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 0.05066
## data_for_model$defined_populations_simplifiedmanagement_units 0.05449
## data_for_model$defined_populations_simplifiedother 0.11150
## data_for_model$defined_populations_simplifiedother_combinations 0.03467
## z value
## (Intercept) 6.973
## data_for_model$defined_populations_simplifieddispersal_buffer -18.576
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 1.711
## data_for_model$defined_populations_simplifiedgenetic_clusters -20.194
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -16.600
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 5.833
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.952
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units -2.223
## data_for_model$defined_populations_simplifiedmanagement_units -8.225
## data_for_model$defined_populations_simplifiedother -11.138
## data_for_model$defined_populations_simplifiedother_combinations -15.722
## Pr(>|z|)
## (Intercept) 3.10e-12
## data_for_model$defined_populations_simplifieddispersal_buffer < 2e-16
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 0.0871
## data_for_model$defined_populations_simplifiedgenetic_clusters < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 5.46e-09
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.3413
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 0.0262
## data_for_model$defined_populations_simplifiedmanagement_units < 2e-16
## data_for_model$defined_populations_simplifiedother < 2e-16
## data_for_model$defined_populations_simplifiedother_combinations < 2e-16
##
## (Intercept) ***
## data_for_model$defined_populations_simplifieddispersal_buffer ***
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies .
## data_for_model$defined_populations_simplifiedgenetic_clusters ***
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies ***
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries ***
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units *
## data_for_model$defined_populations_simplifiedmanagement_units ***
## data_for_model$defined_populations_simplifiedother ***
## data_for_model$defined_populations_simplifiedother_combinations ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_mdl$dfnd_ppltns_smplfdd_ -0.036
## dt_fr_$____ -0.015 0.088
## dt_fr_mdl$dfnd_ppltns_smplfdg_ -0.015 0.083
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ -0.006 0.034
## dt_f_$___g_ -0.021 0.096
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ -0.023 0.069
## dt_f_$___m_ -0.011 0.058
## dt_fr_mdl$dfnd_ppltns_smplfdm_ -0.010 0.054
## dt_fr_md$__ -0.005 0.028
## dt_fr_mdl$dfnd_ppltns_smplfdt_ -0.028 0.414
## d__$____ dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_ 0.092
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ 0.094 0.042
## dt_f_$___g_ 0.172 0.131
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ 0.223 0.073
## dt_f_$___m_ 0.140 0.060
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.145 0.057
## dt_fr_md$__ 0.069 0.031
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.185 0.120
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ d__$_g
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_ 0.069
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ 0.080 0.139
## dt_f_$___m_ 0.050 0.113
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.052 0.106
## dt_fr_md$__ 0.026 0.054
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.072 0.179
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ d__$_m
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_ 0.118
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.123 0.081
## dt_fr_md$__ 0.057 0.038
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.153 0.113
## dt_fr_mdl$dfnd_ppltns_smplfdm_ dt__$__
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_
## dt_fr_mdl$dfnd_ppltns_smplfdm_
## dt_fr_md$__ 0.039
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.111 0.055
Controling for species range type (wide or restricted):
m1.1<-glmer(data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified + data_for_model$species_range + (1|data_for_model$country_assessment), family ="poisson")
summary(m1.1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified +
## data_for_model$species_range + (1 | data_for_model$country_assessment)
##
## AIC BIC logLik deviance df.resid
## 24997.2 25059.9 -12485.6 24971.2 903
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.069 -2.896 -1.114 0.733 89.778
##
## Random effects:
## Groups Name Variance Std.Dev.
## data_for_model$country_assessment (Intercept) 0.8944 0.9458
## Number of obs: 916, groups: data_for_model$country_assessment, 9
##
## Fixed effects:
## Estimate
## (Intercept) 1.98388
## data_for_model$defined_populations_simplifieddispersal_buffer -1.26125
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies -0.13588
## data_for_model$defined_populations_simplifiedgenetic_clusters -1.55634
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -1.97703
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.05534
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.20739
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units -0.13278
## data_for_model$defined_populations_simplifiedmanagement_units -0.84734
## data_for_model$defined_populations_simplifiedother -1.30432
## data_for_model$defined_populations_simplifiedother_combinations -0.77309
## data_for_model$species_rangewide_ranging 1.10154
## Std. Error
## (Intercept) 0.31635
## data_for_model$defined_populations_simplifieddispersal_buffer 0.05067
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 0.03225
## data_for_model$defined_populations_simplifiedgenetic_clusters 0.06222
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.08953
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.03550
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.03963
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 0.05084
## data_for_model$defined_populations_simplifiedmanagement_units 0.05480
## data_for_model$defined_populations_simplifiedother 0.11141
## data_for_model$defined_populations_simplifiedother_combinations 0.03493
## data_for_model$species_rangewide_ranging 0.01962
## z value
## (Intercept) 6.271
## data_for_model$defined_populations_simplifieddispersal_buffer -24.890
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies -4.214
## data_for_model$defined_populations_simplifiedgenetic_clusters -25.013
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -22.083
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 1.559
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -5.232
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units -2.612
## data_for_model$defined_populations_simplifiedmanagement_units -15.463
## data_for_model$defined_populations_simplifiedother -11.708
## data_for_model$defined_populations_simplifiedother_combinations -22.133
## data_for_model$species_rangewide_ranging 56.144
## Pr(>|z|)
## (Intercept) 3.59e-10
## data_for_model$defined_populations_simplifieddispersal_buffer < 2e-16
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies 2.51e-05
## data_for_model$defined_populations_simplifiedgenetic_clusters < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.11903
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 1.67e-07
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units 0.00901
## data_for_model$defined_populations_simplifiedmanagement_units < 2e-16
## data_for_model$defined_populations_simplifiedother < 2e-16
## data_for_model$defined_populations_simplifiedother_combinations < 2e-16
## data_for_model$species_rangewide_ranging < 2e-16
##
## (Intercept) ***
## data_for_model$defined_populations_simplifieddispersal_buffer ***
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies ***
## data_for_model$defined_populations_simplifiedgenetic_clusters ***
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies ***
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies ***
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units **
## data_for_model$defined_populations_simplifiedmanagement_units ***
## data_for_model$defined_populations_simplifiedother ***
## data_for_model$defined_populations_simplifiedother_combinations ***
## data_for_model$species_rangewide_ranging ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_mdl$dfnd_ppltns_smplfdd_ -0.035
## dt_fr_$____ -0.013 0.093
## dt_fr_mdl$dfnd_ppltns_smplfdg_ -0.014 0.096
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ -0.004 0.039
## dt_f_$___g_ -0.022 0.117
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ -0.021 0.073
## dt_f_$___m_ -0.012 0.056
## dt_fr_mdl$dfnd_ppltns_smplfdm_ -0.007 0.063
## dt_fr_mdl$d__ -0.005 0.031
## dt_fr_mdl$dfnd_ppltns_smplfdt_ -0.027 0.419
## dt_fr_mdl$s__ -0.029 -0.090
## d__$____ dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_ 0.096
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ 0.099 0.045
## dt_f_$___g_ 0.157 0.152
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ 0.222 0.076
## dt_f_$___m_ 0.131 0.067
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.152 0.064
## dt_fr_mdl$d__ 0.062 0.030
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.178 0.131
## dt_fr_mdl$s__ -0.103 -0.082
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ d__$_g
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_ 0.067
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ 0.085 0.122
## dt_f_$___m_ 0.047 0.109
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.060 0.102
## dt_fr_mdl$d__ 0.023 0.052
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.073 0.196
## dt_fr_mdl$s__ -0.086 -0.073
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ d__$_m
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_ 0.112
## dt_fr_mdl$dfnd_ppltns_smplfdm_ 0.130 0.077
## dt_fr_mdl$d__ 0.050 0.034
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.148 0.106
## dt_fr_mdl$s__ -0.105 -0.005
## dt_fr_mdl$dfnd_ppltns_smplfdm_ dt_fr_mdl$d__
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_
## dt_fr_mdl$dfnd_ppltns_smplfdm_
## dt_fr_mdl$d__ 0.036
## dt_fr_mdl$dfnd_ppltns_smplfdt_ 0.114 0.053
## dt_fr_mdl$s__ -0.118 -0.010
## dt_fr_mdl$dfnd_ppltns_smplfdt_
## dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_$____
## dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__
## dt_f_$___g_
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__
## dt_f_$___m_
## dt_fr_mdl$dfnd_ppltns_smplfdm_
## dt_fr_mdl$d__
## dt_fr_mdl$dfnd_ppltns_smplfdt_
## dt_fr_mdl$s__ -0.110
(b) Does the proportion of maintained populations (indicator2) vary
with method used?
Plot
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(n_extant_populations<500) %>%
filter(!is.na(indicator2)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
mutate(myaxis = factor(myaxis,
levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
## plot for Proportion of maintained populations (indicator)
p2<- df %>%
filter(n_extant_populations<500) %>%
ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Proportion of maintained populations") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme(text = element_text(size = 13))
p2

Try to show species range type
p2.1<- df %>%
filter(n_extant_populations<500) %>%
ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Proportion of maintained populations") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
new_scale_color() +
geom_jitter(size=1, width = 0.1, aes(color = species_range)) +
theme(text = element_text(size = 13))
p2.1

Prepare data for model (remove outliers and NA in desired variable)
and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 78
## eco_biogeo_proxies
## 32
## genetic_clusters
## 51
## genetic_clusters eco_biogeo_proxies
## 18
## genetic_clusters geographic_boundaries
## 41
## geographic_boundaries
## 176
## geographic_boundaries eco_biogeo_proxies
## 68
## geographic_boundaries management_units
## 17
## management_units
## 23
## other
## 9
## other_combinations
## 78
# total n
nrow(data_for_model)
## [1] 591
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
Run model asking: Does indicator 2 vary with method used? Controlling
for variation in indicator2 among countries:
m2<-glmmTMB(indicator2 ~ defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = data_for_model)
See results:
summary(m2)
## Family: ordbeta ( logit )
## Formula:
## indicator2 ~ defined_populations_simplified + (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 729.0 794.7 -349.5 699.0 576
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.2805 0.5296
## Number of obs: 591, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 3.99
##
## Conditional model:
## Estimate
## (Intercept) 0.58695
## defined_populations_simplifieddispersal_buffer 0.22299
## defined_populations_simplifiedeco_biogeo_proxies 0.06970
## defined_populations_simplifiedgenetic_clusters 0.51136
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.78299
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.02414
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.04609
## defined_populations_simplifiedgeographic_boundaries management_units 0.32440
## defined_populations_simplifiedmanagement_units -0.16040
## defined_populations_simplifiedother 0.05444
## defined_populations_simplifiedother_combinations 0.42862
## Std. Error
## (Intercept) 0.21267
## defined_populations_simplifieddispersal_buffer 0.24555
## defined_populations_simplifiedeco_biogeo_proxies 0.21063
## defined_populations_simplifiedgenetic_clusters 0.25606
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.43594
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.21557
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.22820
## defined_populations_simplifiedgeographic_boundaries management_units 0.33471
## defined_populations_simplifiedmanagement_units 0.24622
## defined_populations_simplifiedother 0.50714
## defined_populations_simplifiedother_combinations 0.16657
## z value
## (Intercept) 2.760
## defined_populations_simplifieddispersal_buffer 0.908
## defined_populations_simplifiedeco_biogeo_proxies 0.331
## defined_populations_simplifiedgenetic_clusters 1.997
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.796
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.112
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.202
## defined_populations_simplifiedgeographic_boundaries management_units 0.969
## defined_populations_simplifiedmanagement_units -0.651
## defined_populations_simplifiedother 0.107
## defined_populations_simplifiedother_combinations 2.573
## Pr(>|z|)
## (Intercept) 0.00578
## defined_populations_simplifieddispersal_buffer 0.36380
## defined_populations_simplifiedeco_biogeo_proxies 0.74071
## defined_populations_simplifiedgenetic_clusters 0.04583
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.07248
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.91083
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.83993
## defined_populations_simplifiedgeographic_boundaries management_units 0.33244
## defined_populations_simplifiedmanagement_units 0.51474
## defined_populations_simplifiedother 0.91452
## defined_populations_simplifiedother_combinations 0.01008
##
## (Intercept) **
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters *
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies .
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(c) Does the proportion of populations with Ne>500 (indicator1)
vary with method used?
Plot
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(n_extant_populations<500) %>%
filter(!is.na(indicator1)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
mutate(myaxis = factor(myaxis,
levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
## plot
p3<- df %>%
ggplot(aes(x=myaxis, y=indicator1, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Proportion of populations with Ne>500") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev) +
theme(text = element_text(size = 13))
p3

Prepare data for model (remove outliers and NA in desired variable)
and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 138
## eco_biogeo_proxies
## 16
## genetic_clusters
## 58
## genetic_clusters eco_biogeo_proxies
## 8
## genetic_clusters geographic_boundaries
## 41
## geographic_boundaries
## 156
## geographic_boundaries eco_biogeo_proxies
## 54
## geographic_boundaries management_units
## 20
## management_units
## 13
## other
## 6
## other_combinations
## 66
# total n
nrow(data_for_model)
## [1] 576
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
Run model asking: Does indicator 1 vary with method used? Controlling
for variation in indicator1 among countries:
m3<-glmmTMB(indicator1 ~ defined_populations_simplified + (1|country_assessment), family = "ordbeta", data = data_for_model)
See results:
summary(m3)
## Family: ordbeta ( logit )
## Formula:
## indicator1 ~ defined_populations_simplified + (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 1070.0 1135.4 -520.0 1040.0 561
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.08835 0.2972
## Number of obs: 576, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 3.91
##
## Conditional model:
## Estimate
## (Intercept) -0.84812
## defined_populations_simplifieddispersal_buffer 0.29152
## defined_populations_simplifiedeco_biogeo_proxies -0.14418
## defined_populations_simplifiedgenetic_clusters 0.49361
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.01444
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.50566
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.08049
## defined_populations_simplifiedgeographic_boundaries management_units 0.60663
## defined_populations_simplifiedmanagement_units -0.06766
## defined_populations_simplifiedother 1.01734
## defined_populations_simplifiedother_combinations 0.37399
## Std. Error
## (Intercept) 0.17950
## defined_populations_simplifieddispersal_buffer 0.30486
## defined_populations_simplifiedeco_biogeo_proxies 0.31713
## defined_populations_simplifiedgenetic_clusters 0.23501
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.43880
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.24600
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.34136
## defined_populations_simplifiedgeographic_boundaries management_units 0.33305
## defined_populations_simplifiedmanagement_units 0.41663
## defined_populations_simplifiedother 0.62154
## defined_populations_simplifiedother_combinations 0.20694
## z value
## (Intercept) -4.725
## defined_populations_simplifieddispersal_buffer 0.956
## defined_populations_simplifiedeco_biogeo_proxies -0.455
## defined_populations_simplifiedgenetic_clusters 2.100
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 2.312
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 2.056
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.236
## defined_populations_simplifiedgeographic_boundaries management_units 1.821
## defined_populations_simplifiedmanagement_units -0.162
## defined_populations_simplifiedother 1.637
## defined_populations_simplifiedother_combinations 1.807
## Pr(>|z|)
## (Intercept) 2.3e-06
## defined_populations_simplifieddispersal_buffer 0.3390
## defined_populations_simplifiedeco_biogeo_proxies 0.6494
## defined_populations_simplifiedgenetic_clusters 0.0357
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.0208
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.0398
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.8136
## defined_populations_simplifiedgeographic_boundaries management_units 0.0685
## defined_populations_simplifiedmanagement_units 0.8710
## defined_populations_simplifiedother 0.1017
## defined_populations_simplifiedother_combinations 0.0707
##
## (Intercept) ***
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters *
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies *
## defined_populations_simplifiedgenetic_clusters geographic_boundaries *
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units .
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(A) Is there a relationship between number of maintained populations
and Indicator2, overall, and/or with some methods?
Scatter plot of indicator2 vs extant pops
p4<- indicators_full %>%
# filter outliers with too many pops and missing data
filter(n_extant_populations<500) %>%
filter(!is.na(indicator2)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=n_extant_populations, y=indicator2, color=defined_populations_nicenames)) +
geom_point() +
theme_light() +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
theme(legend.position = "none") +
ylab("Proportion of maintained populations") +
xlab("Number of maintained populations") +
theme(text = element_text(size = 13))
p4

p4.1<- indicators_full %>%
# filter outliers with too many pops and missing data
filter(n_extant_populations<500) %>%
filter(!is.na(indicator2)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=n_extant_populations, y=indicator2, color=species_range)) +
geom_point() +
theme_light() +
theme(legend.position = "none") +
ylab("Proportion of maintained populations") +
xlab("Number of maintained populations") +
theme(text = element_text(size = 13))
p4.1

Prepare data for model (remove outliers and NA in desired variable)
and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check number of methods
length(unique(data_for_model$defined_populations_simplified))
## [1] 11
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 78
## eco_biogeo_proxies
## 32
## genetic_clusters
## 51
## genetic_clusters eco_biogeo_proxies
## 18
## genetic_clusters geographic_boundaries
## 41
## geographic_boundaries
## 176
## geographic_boundaries eco_biogeo_proxies
## 68
## geographic_boundaries management_units
## 17
## management_units
## 23
## other
## 9
## other_combinations
## 78
# total n
nrow(data_for_model)
## [1] 591
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
Run model:
# run model
m4 <- glmmTMB(indicator2 ~ defined_populations_simplified + n_extant_populations + defined_populations_simplified*n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, : NA/
## NaN function evaluation
Summary:
summary(m4)
## Family: ordbeta ( logit )
## Formula:
## indicator2 ~ defined_populations_simplified + n_extant_populations +
## defined_populations_simplified * n_extant_populations + (1 |
## country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 715.2 829.1 -331.6 663.2 565
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.2535 0.5035
## Number of obs: 591, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.65
##
## Conditional model:
## Estimate
## (Intercept) 0.4860970
## defined_populations_simplifieddispersal_buffer 0.2041118
## defined_populations_simplifiedeco_biogeo_proxies 0.0185823
## defined_populations_simplifiedgenetic_clusters 0.4921698
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 2.0181250
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.1553793
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.1548329
## defined_populations_simplifiedgeographic_boundaries management_units 0.0756674
## defined_populations_simplifiedmanagement_units 0.3651752
## defined_populations_simplifiedother -1.4951385
## defined_populations_simplifiedother_combinations 0.2931036
## n_extant_populations 0.0033388
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.0062130
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations -0.0003238
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.0195600
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations -0.1001861
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations -0.0054253
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -0.0059498
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.0509407
## defined_populations_simplifiedmanagement_units:n_extant_populations -0.0307312
## defined_populations_simplifiedother:n_extant_populations 0.4659543
## defined_populations_simplifiedother_combinations:n_extant_populations 0.0076828
## Std. Error
## (Intercept) 0.2076564
## defined_populations_simplifieddispersal_buffer 0.2464999
## defined_populations_simplifiedeco_biogeo_proxies 0.2362313
## defined_populations_simplifiedgenetic_clusters 0.3798044
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.6560356
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.2306203
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.2493244
## defined_populations_simplifiedgeographic_boundaries management_units 0.4242821
## defined_populations_simplifiedmanagement_units 0.3322970
## defined_populations_simplifiedother 0.9802928
## defined_populations_simplifiedother_combinations 0.1898614
## n_extant_populations 0.0016531
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.0072949
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations 0.0030501
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.0612603
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.0330297
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations 0.0024952
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations 0.0028486
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.0528543
## defined_populations_simplifiedmanagement_units:n_extant_populations 0.0149129
## defined_populations_simplifiedother:n_extant_populations 0.2932465
## defined_populations_simplifiedother_combinations:n_extant_populations 0.0050420
## z value
## (Intercept) 2.341
## defined_populations_simplifieddispersal_buffer 0.828
## defined_populations_simplifiedeco_biogeo_proxies 0.079
## defined_populations_simplifiedgenetic_clusters 1.296
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 3.076
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.674
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.621
## defined_populations_simplifiedgeographic_boundaries management_units 0.178
## defined_populations_simplifiedmanagement_units 1.099
## defined_populations_simplifiedother -1.525
## defined_populations_simplifiedother_combinations 1.544
## n_extant_populations 2.020
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.852
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations -0.106
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.319
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations -3.033
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations -2.174
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -2.089
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.964
## defined_populations_simplifiedmanagement_units:n_extant_populations -2.061
## defined_populations_simplifiedother:n_extant_populations 1.589
## defined_populations_simplifiedother_combinations:n_extant_populations 1.524
## Pr(>|z|)
## (Intercept) 0.01924
## defined_populations_simplifieddispersal_buffer 0.40765
## defined_populations_simplifiedeco_biogeo_proxies 0.93730
## defined_populations_simplifiedgenetic_clusters 0.19503
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.00210
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.50047
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.53459
## defined_populations_simplifiedgeographic_boundaries management_units 0.85845
## defined_populations_simplifiedmanagement_units 0.27179
## defined_populations_simplifiedother 0.12721
## defined_populations_simplifiedother_combinations 0.12264
## n_extant_populations 0.04341
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.39438
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations 0.91545
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.74950
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.00242
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations 0.02968
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations 0.03674
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.33515
## defined_populations_simplifiedmanagement_units:n_extant_populations 0.03933
## defined_populations_simplifiedother:n_extant_populations 0.11207
## defined_populations_simplifiedother_combinations:n_extant_populations 0.12757
##
## (Intercept) *
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies **
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations
## n_extant_populations *
## defined_populations_simplifieddispersal_buffer:n_extant_populations
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations
## defined_populations_simplifiedgenetic_clusters:n_extant_populations
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations **
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations *
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations *
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations
## defined_populations_simplifiedmanagement_units:n_extant_populations *
## defined_populations_simplifiedother:n_extant_populations
## defined_populations_simplifiedother_combinations:n_extant_populations
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We run a similar model than above, but limited to relationship of
interest:
m4.1 <- glmmTMB(indicator2 ~ n_extant_populations + (1|country_assessment), family = "ordbeta", data= data_for_model)
Summary:
summary(m4.1)
## Family: ordbeta ( logit )
## Formula: indicator2 ~ n_extant_populations + (1 | country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 725.9 752.2 -356.9 713.9 585
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.2504 0.5004
## Number of obs: 591, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6994798 0.1846795 3.788 0.000152 ***
## n_extant_populations 0.0007942 0.0010104 0.786 0.431883
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Because “what’s a population and how do you define them?” is such an
important question, we can also test the effect of methods alone. First,
subset the data to only those taxa where a single method was used:
ind2_single_methods<-indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(n_extant_populations<500) %>% # doesn't make a difference in the test below, but useful for
filter(defined_populations_simplified=="genetic_clusters" |
defined_populations_simplified=="geographic_boundaries" |
defined_populations_simplified=="eco_biogeo_proxies" |
defined_populations_simplified=="management_units" |
defined_populations_simplified=="dispersal_buffer")
# check number of methods
length(unique(ind2_single_methods$defined_populations_simplified))
## [1] 5
# check n by method
table(ind2_single_methods$defined_populations_simplified)
##
## dispersal_buffer eco_biogeo_proxies genetic_clusters
## 78 32 51
## geographic_boundaries management_units
## 176 23
# check n total
nrow(ind2_single_methods)
## [1] 360
# re-level to use geographic boundaries as reference category for the analysis
ind2_single_methods$defined_populations_simplified<-relevel(as.factor(ind2_single_methods$defined_populations_simplified),
ref="geographic_boundaries")
Run model:
# run model
m4.2 <- glm(ind2_single_methods$indicator2 ~ ind2_single_methods$n_extant_populations +
ind2_single_methods$defined_populations_simplified +
ind2_single_methods$n_extant_populations*ind2_single_methods$defined_populations_simplified, family = "quasibinomial")
Summary:
summary(m4.2)
##
## Call:
## glm(formula = ind2_single_methods$indicator2 ~ ind2_single_methods$n_extant_populations +
## ind2_single_methods$defined_populations_simplified + ind2_single_methods$n_extant_populations *
## ind2_single_methods$defined_populations_simplified, family = "quasibinomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0141 -0.3358 0.3443 0.6686 0.7987
##
## Coefficients:
## Estimate
## (Intercept) 1.380249
## ind2_single_methods$n_extant_populations 0.001082
## ind2_single_methods$defined_populations_simplifieddispersal_buffer -0.469445
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies 0.058023
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters 1.478090
## ind2_single_methods$defined_populations_simplifiedmanagement_units 0.507064
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer 0.067182
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies 0.002354
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters -0.063257
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units -0.044946
## Std. Error
## (Intercept) 0.153492
## ind2_single_methods$n_extant_populations 0.003369
## ind2_single_methods$defined_populations_simplifieddispersal_buffer 0.329594
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies 0.403533
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters 0.611069
## ind2_single_methods$defined_populations_simplifiedmanagement_units 0.537693
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer 0.043134
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies 0.007624
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters 0.125555
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units 0.024832
## t value
## (Intercept) 8.992
## ind2_single_methods$n_extant_populations 0.321
## ind2_single_methods$defined_populations_simplifieddispersal_buffer -1.424
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies 0.144
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters 2.419
## ind2_single_methods$defined_populations_simplifiedmanagement_units 0.943
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer 1.558
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies 0.309
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters -0.504
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units -1.810
## Pr(>|t|)
## (Intercept) <2e-16
## ind2_single_methods$n_extant_populations 0.7483
## ind2_single_methods$defined_populations_simplifieddispersal_buffer 0.1552
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies 0.8858
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters 0.0161
## ind2_single_methods$defined_populations_simplifiedmanagement_units 0.3463
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer 0.1202
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies 0.7577
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters 0.6147
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units 0.0712
##
## (Intercept) ***
## ind2_single_methods$n_extant_populations
## ind2_single_methods$defined_populations_simplifieddispersal_buffer
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters *
## ind2_single_methods$defined_populations_simplifiedmanagement_units
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 0.5367873)
##
## Null deviance: 161.55 on 359 degrees of freedom
## Residual deviance: 150.03 on 350 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
(B) Is there a relationship between number of maintained populations
and indicator1, overall, and/or with some methods?
Scatter plot of indicator1 vs extant pops
p5<- indicators_full %>%
# filter outliers with too many pops and missing data
filter(n_extant_populations<500) %>%
filter(!is.na(indicator1)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=n_extant_populations, y=indicator1, color=defined_populations_nicenames)) +
geom_point() +
theme_light() +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
theme(legend.position = "none") +
ylab("Proportion of populations with Ne>500") +
xlab("Number of maintained populations") +
theme(text = element_text(size = 13))
p5

## Coloring by range
p5.1<- indicators_full %>%
# filter outliers with too many pops and missing data
filter(n_extant_populations<500) %>%
filter(!is.na(indicator1)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=n_extant_populations, y=indicator1, color=species_range)) +
geom_point() +
theme_light() +
theme(legend.position = "none") +
ylab("Proportion of populations with Ne>500") +
xlab("Number of maintained populations") +
theme(text = element_text(size = 13))
p5.1

Prepare data for model (remove outliers and NA in desired variable)
and check n:
# remove missing data
data_for_model<-indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(!is.na(n_extant_populations)) %>%
filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots
# check number of methods
length(unique(data_for_model$defined_populations_simplified))
## [1] 11
# check n per method
table(data_for_model$defined_populations_simplified)
##
## dispersal_buffer
## 138
## eco_biogeo_proxies
## 16
## genetic_clusters
## 58
## genetic_clusters eco_biogeo_proxies
## 8
## genetic_clusters geographic_boundaries
## 41
## geographic_boundaries
## 156
## geographic_boundaries eco_biogeo_proxies
## 54
## geographic_boundaries management_units
## 20
## management_units
## 13
## other
## 6
## other_combinations
## 66
# total n
nrow(data_for_model)
## [1] 576
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
ref="geographic_boundaries")
Run model:
# run model
m5 <- glmmTMB(indicator1 ~ defined_populations_simplified + n_extant_populations + defined_populations_simplified*n_extant_populations + (1|country_assessment), family = "ordbeta", data = data_for_model)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, : NA/
## NaN function evaluation
Summary:
summary(m5)
## Family: ordbeta ( logit )
## Formula:
## indicator1 ~ defined_populations_simplified + n_extant_populations +
## defined_populations_simplified * n_extant_populations + (1 |
## country_assessment)
## Data: data_for_model
##
## AIC BIC logLik deviance df.resid
## 1061.9 1175.1 -504.9 1009.9 550
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.0845 0.2907
## Number of obs: 576, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.66
##
## Conditional model:
## Estimate
## (Intercept) -0.814369
## defined_populations_simplifieddispersal_buffer 0.311050
## defined_populations_simplifiedeco_biogeo_proxies 0.129366
## defined_populations_simplifiedgenetic_clusters 1.233443
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.014943
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.654810
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.714061
## defined_populations_simplifiedgeographic_boundaries management_units 0.947628
## defined_populations_simplifiedmanagement_units 0.369816
## defined_populations_simplifiedother -0.867823
## defined_populations_simplifiedother_combinations 0.429108
## n_extant_populations -0.002280
## defined_populations_simplifieddispersal_buffer:n_extant_populations -0.003397
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations -0.012999
## defined_populations_simplifiedgenetic_clusters:n_extant_populations -0.177531
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.010572
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations -0.015191
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -0.118011
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations -0.029682
## defined_populations_simplifiedmanagement_units:n_extant_populations -0.055449
## defined_populations_simplifiedother:n_extant_populations 0.718184
## defined_populations_simplifiedother_combinations:n_extant_populations -0.003197
## Std. Error
## (Intercept) 0.188367
## defined_populations_simplifieddispersal_buffer 0.283221
## defined_populations_simplifiedeco_biogeo_proxies 0.409036
## defined_populations_simplifiedgenetic_clusters 0.360680
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.627771
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.307281
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.495225
## defined_populations_simplifiedgeographic_boundaries management_units 0.416328
## defined_populations_simplifiedmanagement_units 0.609911
## defined_populations_simplifiedother 1.543631
## defined_populations_simplifiedother_combinations 0.208262
## n_extant_populations 0.003869
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.005200
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations 0.011637
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.064257
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.084028
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations 0.022612
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations 0.058478
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.029395
## defined_populations_simplifiedmanagement_units:n_extant_populations 0.068157
## defined_populations_simplifiedother:n_extant_populations 0.768524
## defined_populations_simplifiedother_combinations:n_extant_populations 0.005144
## z value
## (Intercept) -4.323
## defined_populations_simplifieddispersal_buffer 1.098
## defined_populations_simplifiedeco_biogeo_proxies 0.316
## defined_populations_simplifiedgenetic_clusters 3.420
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.617
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 2.131
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 1.442
## defined_populations_simplifiedgeographic_boundaries management_units 2.276
## defined_populations_simplifiedmanagement_units 0.606
## defined_populations_simplifiedother -0.562
## defined_populations_simplifiedother_combinations 2.060
## n_extant_populations -0.589
## defined_populations_simplifieddispersal_buffer:n_extant_populations -0.653
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations -1.117
## defined_populations_simplifiedgenetic_clusters:n_extant_populations -2.763
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.126
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations -0.672
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations -2.018
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations -1.010
## defined_populations_simplifiedmanagement_units:n_extant_populations -0.814
## defined_populations_simplifiedother:n_extant_populations 0.934
## defined_populations_simplifiedother_combinations:n_extant_populations -0.622
## Pr(>|z|)
## (Intercept) 1.54e-05
## defined_populations_simplifieddispersal_buffer 0.272091
## defined_populations_simplifiedeco_biogeo_proxies 0.751798
## defined_populations_simplifiedgenetic_clusters 0.000627
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.105934
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.033091
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.149333
## defined_populations_simplifiedgeographic_boundaries management_units 0.022837
## defined_populations_simplifiedmanagement_units 0.544286
## defined_populations_simplifiedother 0.573983
## defined_populations_simplifiedother_combinations 0.039358
## n_extant_populations 0.555537
## defined_populations_simplifieddispersal_buffer:n_extant_populations 0.513601
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations 0.263984
## defined_populations_simplifiedgenetic_clusters:n_extant_populations 0.005730
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations 0.899877
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations 0.501707
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations 0.043589
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations 0.312619
## defined_populations_simplifiedmanagement_units:n_extant_populations 0.415906
## defined_populations_simplifiedother:n_extant_populations 0.350047
## defined_populations_simplifiedother_combinations:n_extant_populations 0.534218
##
## (Intercept) ***
## defined_populations_simplifieddispersal_buffer
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters ***
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries *
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units *
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations *
## n_extant_populations
## defined_populations_simplifieddispersal_buffer:n_extant_populations
## defined_populations_simplifiedeco_biogeo_proxies:n_extant_populations
## defined_populations_simplifiedgenetic_clusters:n_extant_populations **
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:n_extant_populations
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:n_extant_populations
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:n_extant_populations *
## defined_populations_simplifiedgeographic_boundaries management_units:n_extant_populations
## defined_populations_simplifiedmanagement_units:n_extant_populations
## defined_populations_simplifiedother:n_extant_populations
## defined_populations_simplifiedother_combinations:n_extant_populations
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We run a similar model than above, but limited to relationship of
interest:
# m5.1 pending to update when 4.1 is checked ok
Summary:
# summary(m5.1)
Because “what’s a population and how do you define them?” is such an
important question, we can also test the effect of methods alone. First,
subset the data to only those taxa where a single method was used:
ind1_single_methods<-indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(n_extant_populations<500) %>% # doesn't make a difference in the test below, but useful for
filter(defined_populations_simplified=="genetic_clusters" |
defined_populations_simplified=="geographic_boundaries" |
defined_populations_simplified=="eco_biogeo_proxies" |
defined_populations_simplified=="management_units" |
defined_populations_simplified=="dispersal_buffer")
# check number of methods
length(unique(ind1_single_methods$defined_populations_simplified))
## [1] 5
# check n by method
table(ind1_single_methods$defined_populations_simplified)
##
## dispersal_buffer eco_biogeo_proxies genetic_clusters
## 138 16 58
## geographic_boundaries management_units
## 156 13
# check n total
nrow(ind1_single_methods)
## [1] 381
# re-level to use geographic boundaries as reference category for the analysis
ind1_single_methods$defined_populations_simplified<-relevel(as.factor(ind1_single_methods$defined_populations_simplified),
ref="geographic_boundaries")
Run model:
# run model
# m5.2 pending
Summary:
# summary(m5.2) pending
Main Figure: Single plot 3 panels box plots for the effect of method
on: number of populations, proportion of maintained populations
(indicator 2) and Proportion of populations with Ne>500 (indicator
1).
Plot in three panels.
##### plot for Proportion of maintained populations (indicator 2) only with n in axis labels
# sample size
sample_size <- indicators_full %>%
filter(!is.na(indicator2)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(n_extant_populations<500) %>%
filter(!is.na(indicator2)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = as.factor(paste0(defined_populations_nicenames, " (n= ", num, ")")))
## Joining, by = "defined_populations_nicenames"
p2.1<- df %>%
filter(n_extant_populations<500) %>%
ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Proportion of maintained populations") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev,
labels= rev(sub(".*(\\(n= \\d+\\))", "\\1", levels(df$myaxis)))) + # extract "(n = number)") and show them in reverse order
theme(text = element_text(size = 13))
##### plot for Proportion populations Ne>500 (indicator 1) only with n in axis labels
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
filter(!is.na(indicator1)) %>%
filter(n_extant_populations<500) %>%
group_by(defined_populations_nicenames) %>% summarize(num=n())
# custom axis
## new dataframe
df<-indicators_full %>%
filter(n_extant_populations<500) %>%
filter(!is.na(indicator1)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = as.factor(paste0(defined_populations_nicenames, " (n= ", num, ")")))
## Joining, by = "defined_populations_nicenames"
## plot
p3.1<- df %>%
ggplot(aes(x=myaxis, y=indicator1, color=defined_populations_nicenames,
fill=defined_populations_nicenames)) +
geom_boxplot() + xlab("") + ylab("Proportion of populations with Ne>500") +
geom_jitter(size=.4, width = 0.1, color="black") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm")) + # this is used to decrease the space between plots)
scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_color_manual(values=simplifiedmethods_colors,
breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
scale_x_discrete(limits=rev,
labels= rev(sub(".*(\\(n= \\d+\\))", "\\1", levels(df$myaxis)))) + # extract "(n = number)") and show them in reverse order
theme(text = element_text(size = 13))
## Plot 3 panels
plot_grid(p1, p2.1, p3.1, ncol=3, rel_widths = c(1.9,1,1), align = "h", labels=c("a)", "b)", "c)"))

Effect of distribution range (restricted vs wide) on the
indicators
All the following plots and analyses consider the average of
multiassessed species (variable _mean), so that they are
shown only once.
To have nicer looking plots, change “wide_ranging” for “wide
ranging”:
indicators_averaged_one$species_range<-gsub("wide_ranging", "wide ranging", indicators_averaged_one$species_range)
Indicator 1 (Ne>5000)
Plot Indicator 1 by type of range in the entire dataset. Filtering NA
in species range:
# get sample size by desired category
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(species_range)) %>%
group_by(species_range) %>% summarize(num=n())
# plot
p1<-indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(species_range)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%
# plot
ggplot(aes(x=myaxis, y=indicator1_mean , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations with Ne>500") +
coord_flip() +
scale_fill_manual(breaks=c("wide ranging", "restricted", "unknown"),
labels=c("wide ranging", "restricted", "unknown"),
values=c("#00BFC4", "#F8766D", "grey80")) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
p1

Supplementary Figure: Plot Ne Indicator by country and type of
range. Remove “unknown” and NA for better visualization.
### Duplicate dataframe to have a column with "all data" for faceting
df<-CreateAllFacet(indicators_averaged_one, "country_assessment")
# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))
## plot
df %>%
# filter out "unknown" range
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=species_range, y=indicator1_mean , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations with Ne>500") +
coord_flip() +
scale_x_discrete(breaks=c("wide ranging", "restricted"),
labels=c("wide ranging", "restricted")) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
facet_wrap(~facet, ncol = 5) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 662 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 662 rows containing missing values (`geom_point()`).

Test the effect of range type on Ne>500 indicator. Does the
indicator vary between wide randing vs restricted distribution species?
(keep only those categories and remove unknwon due to small sampling
size)
## Remove unknown
data<- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(species_range !="unknown")
# summary of indicator
summary(data$indicator1_mean)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2728 0.5000 1.0000
## run model
m1 <- glmmTMB(indicator1_mean ~ defined_populations_simplified + species_range + defined_populations_simplified*species_range + (1|country_assessment), family = "ordbeta", data = data)
# summary results
summary(m1)
## Family: ordbeta ( logit )
## Formula:
## indicator1_mean ~ defined_populations_simplified + species_range +
## defined_populations_simplified * species_range + (1 | country_assessment)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 979.1 1090.7 -463.5 927.1 515
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.1523 0.3902
## Number of obs: 541, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.32
##
## Conditional model:
## Estimate
## (Intercept) -7.257e-01
## defined_populations_simplifiedeco_biogeo_proxies -2.312e-01
## defined_populations_simplifiedgenetic_clusters -1.949e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -1.747e+01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -4.693e-01
## defined_populations_simplifiedgeographic_boundaries -3.884e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -8.363e-01
## defined_populations_simplifiedgeographic_boundaries management_units 1.465e-01
## defined_populations_simplifiedmanagement_units -2.091e+01
## defined_populations_simplifiedother 1.430e-01
## defined_populations_simplifiedother_combinations 3.159e-01
## species_rangewide ranging 2.723e-01
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging -6.640e-01
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 1.287e+00
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 1.846e+01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 1.056e+00
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging 3.427e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging 8.178e-01
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 9.721e-03
## defined_populations_simplifiedmanagement_units:species_rangewide ranging 2.092e+01
## defined_populations_simplifiedother:species_rangewide ranging 2.512e+01
## defined_populations_simplifiedother_combinations:species_rangewide ranging -4.529e-01
## Std. Error
## (Intercept) 3.250e-01
## defined_populations_simplifiedeco_biogeo_proxies 5.281e-01
## defined_populations_simplifiedgenetic_clusters 4.532e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 4.756e+03
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 4.947e-01
## defined_populations_simplifiedgeographic_boundaries 3.598e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 5.654e-01
## defined_populations_simplifiedgeographic_boundaries management_units 5.233e-01
## defined_populations_simplifiedmanagement_units 1.620e+04
## defined_populations_simplifiedother 7.310e-01
## defined_populations_simplifiedother_combinations 3.822e-01
## species_rangewide ranging 2.733e-01
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 6.918e-01
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 5.104e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 4.756e+03
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 5.350e-01
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging 3.791e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging 7.177e-01
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 6.196e-01
## defined_populations_simplifiedmanagement_units:species_rangewide ranging 1.620e+04
## defined_populations_simplifiedother:species_rangewide ranging 1.202e+05
## defined_populations_simplifiedother_combinations:species_rangewide ranging 4.091e-01
## z value
## (Intercept) -2.233
## defined_populations_simplifiedeco_biogeo_proxies -0.438
## defined_populations_simplifiedgenetic_clusters -0.430
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -0.004
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.949
## defined_populations_simplifiedgeographic_boundaries -1.079
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -1.479
## defined_populations_simplifiedgeographic_boundaries management_units 0.280
## defined_populations_simplifiedmanagement_units -0.001
## defined_populations_simplifiedother 0.196
## defined_populations_simplifiedother_combinations 0.826
## species_rangewide ranging 0.996
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging -0.960
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 2.521
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 0.004
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 1.974
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging 0.904
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging 1.139
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 0.016
## defined_populations_simplifiedmanagement_units:species_rangewide ranging 0.001
## defined_populations_simplifiedother:species_rangewide ranging 0.000
## defined_populations_simplifiedother_combinations:species_rangewide ranging -1.107
## Pr(>|z|)
## (Intercept) 0.0255
## defined_populations_simplifiedeco_biogeo_proxies 0.6615
## defined_populations_simplifiedgenetic_clusters 0.6672
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.9971
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.3428
## defined_populations_simplifiedgeographic_boundaries 0.2804
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.1391
## defined_populations_simplifiedgeographic_boundaries management_units 0.7796
## defined_populations_simplifiedmanagement_units 0.9990
## defined_populations_simplifiedother 0.8449
## defined_populations_simplifiedother_combinations 0.4086
## species_rangewide ranging 0.3191
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 0.3371
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 0.0117
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 0.9969
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 0.0484
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging 0.3659
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging 0.2545
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 0.9875
## defined_populations_simplifiedmanagement_units:species_rangewide ranging 0.9990
## defined_populations_simplifiedother:species_rangewide ranging 0.9998
## defined_populations_simplifiedother_combinations:species_rangewide ranging 0.2682
##
## (Intercept) *
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations
## species_rangewide ranging
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging *
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging *
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging
## defined_populations_simplifiedmanagement_units:species_rangewide ranging
## defined_populations_simplifiedother:species_rangewide ranging
## defined_populations_simplifiedother_combinations:species_rangewide ranging
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## + country
m1.1 <- glmmTMB(indicator1_mean ~ defined_populations_simplified + species_range + (1|country_assessment), family = "ordbeta", data = data)
# summary results
summary(m1.1)
## Family: ordbeta ( logit )
## Formula:
## indicator1_mean ~ defined_populations_simplified + species_range +
## (1 | country_assessment)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 988.9 1057.6 -478.4 956.9 525
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.1173 0.3425
## Number of obs: 541, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 3.91
##
## Conditional model:
## Estimate
## (Intercept) -0.93634
## defined_populations_simplifiedeco_biogeo_proxies -0.38621
## defined_populations_simplifiedgenetic_clusters 0.54328
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.65558
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.21641
## defined_populations_simplifiedgeographic_boundaries -0.13361
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.37965
## defined_populations_simplifiedgeographic_boundaries management_units 0.21765
## defined_populations_simplifiedmanagement_units -0.30983
## defined_populations_simplifiedother 0.84092
## defined_populations_simplifiedother_combinations 0.09018
## species_rangewide ranging 0.58441
## Std. Error
## (Intercept) 0.25892
## defined_populations_simplifiedeco_biogeo_proxies 0.42649
## defined_populations_simplifiedgenetic_clusters 0.31886
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.48559
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.32962
## defined_populations_simplifiedgeographic_boundaries 0.28367
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.44424
## defined_populations_simplifiedgeographic_boundaries management_units 0.39074
## defined_populations_simplifiedmanagement_units 0.49332
## defined_populations_simplifiedother 0.66622
## defined_populations_simplifiedother_combinations 0.24818
## species_rangewide ranging 0.13777
## z value
## (Intercept) -3.616
## defined_populations_simplifiedeco_biogeo_proxies -0.906
## defined_populations_simplifiedgenetic_clusters 1.704
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 1.350
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.657
## defined_populations_simplifiedgeographic_boundaries -0.471
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.855
## defined_populations_simplifiedgeographic_boundaries management_units 0.557
## defined_populations_simplifiedmanagement_units -0.628
## defined_populations_simplifiedother 1.262
## defined_populations_simplifiedother_combinations 0.363
## species_rangewide ranging 4.242
## Pr(>|z|)
## (Intercept) 0.000299
## defined_populations_simplifiedeco_biogeo_proxies 0.365171
## defined_populations_simplifiedgenetic_clusters 0.088420
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.176999
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.511473
## defined_populations_simplifiedgeographic_boundaries 0.637642
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.392764
## defined_populations_simplifiedgeographic_boundaries management_units 0.577523
## defined_populations_simplifiedmanagement_units 0.529967
## defined_populations_simplifiedother 0.206869
## defined_populations_simplifiedother_combinations 0.716344
## species_rangewide ranging 2.21e-05
##
## (Intercept) ***
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters .
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations
## species_rangewide ranging ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Indicator 2 (mantained populations)
Plot Indicator 2 by type of range in the entire dataset. Filtering NA
in species range:
# get sample size by desired category
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(species_range)) %>%
group_by(species_range) %>% summarize(num=n())
# plot
p2<-indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(species_range)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%
# plot
ggplot(aes(x=myaxis, y=indicator2_mean , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations maintained") +
coord_flip() +
scale_fill_manual(breaks=c("wide ranging", "restricted", "unknown"),
labels=c("wide ranging", "restricted", "unknown"),
values=c("#00BFC4", "#F8766D", "grey80")) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
p2

Plot Indicator 2 by country and type of range. We remove NA and
unknown for better visualization.
### Duplicate dataframe to have a column with "all data" for faceting
df<-CreateAllFacet(indicators_averaged_one, "country_assessment")
# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))
## plot
df %>%
# filter out "unknown" range
filter(species_range !="unknown") %>%
# plot
ggplot(aes(x=species_range, y=indicator2_mean , fill=species_range)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations maintained") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
facet_wrap(~facet, ncol = 5) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 688 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 688 rows containing missing values (`geom_point()`).

Test the effect of range type on the proportion of maitained
populations. Does the indicator vary between wide randing vs restricted
distribution species? Consider only wide randing and restricted
categores (ie remove unknown due to small sampling size)
## Remove unknown
data<- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(species_range !="unknown")
# summary of indicator
summary(data$indicator2_mean)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.6798 1.0000 0.8343 1.0000 1.0000
## run model
m2 <- glmmTMB(indicator2_mean ~ defined_populations_simplified + species_range + defined_populations_simplified*species_range + (1|country_assessment), family = "ordbeta", data = data)
# summary results
summary(m2)
## Family: ordbeta ( logit )
## Formula:
## indicator2_mean ~ defined_populations_simplified + species_range +
## defined_populations_simplified * species_range + (1 | country_assessment)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 649.2 760.2 -298.6 597.2 502
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.2956 0.5436
## Number of obs: 528, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.24
##
## Conditional model:
## Estimate
## (Intercept) 7.528e-01
## defined_populations_simplifiedeco_biogeo_proxies -3.626e-01
## defined_populations_simplifiedgenetic_clusters -5.844e-02
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -4.750e-01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -3.911e-01
## defined_populations_simplifiedgeographic_boundaries -1.901e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -1.625e-01
## defined_populations_simplifiedgeographic_boundaries management_units -1.273e-01
## defined_populations_simplifiedmanagement_units -1.397e-01
## defined_populations_simplifiedother -5.698e-01
## defined_populations_simplifiedother_combinations 9.913e-02
## species_rangewide ranging 1.755e-01
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 3.938e-01
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 4.194e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 2.013e+01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 3.691e-01
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging 3.278e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging -4.704e-01
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 9.751e-01
## defined_populations_simplifiedmanagement_units:species_rangewide ranging -6.595e-01
## defined_populations_simplifiedother:species_rangewide ranging 1.872e+01
## defined_populations_simplifiedother_combinations:species_rangewide ranging 1.796e-01
## Std. Error
## (Intercept) 3.063e-01
## defined_populations_simplifiedeco_biogeo_proxies 4.103e-01
## defined_populations_simplifiedgenetic_clusters 4.763e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 6.035e-01
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 3.696e-01
## defined_populations_simplifiedgeographic_boundaries 2.857e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 3.822e-01
## defined_populations_simplifiedgeographic_boundaries management_units 4.564e-01
## defined_populations_simplifiedmanagement_units 4.793e-01
## defined_populations_simplifiedother 5.888e-01
## defined_populations_simplifiedother_combinations 3.471e-01
## species_rangewide ranging 2.655e-01
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 4.969e-01
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 5.494e-01
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 1.104e+04
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 4.808e-01
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging 3.473e-01
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging 4.651e-01
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 8.653e-01
## defined_populations_simplifiedmanagement_units:species_rangewide ranging 6.037e-01
## defined_populations_simplifiedother:species_rangewide ranging 6.790e+03
## defined_populations_simplifiedother_combinations:species_rangewide ranging 3.973e-01
## z value
## (Intercept) 2.457
## defined_populations_simplifiedeco_biogeo_proxies -0.884
## defined_populations_simplifiedgenetic_clusters -0.123
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies -0.787
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -1.058
## defined_populations_simplifiedgeographic_boundaries -0.665
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.425
## defined_populations_simplifiedgeographic_boundaries management_units -0.279
## defined_populations_simplifiedmanagement_units -0.291
## defined_populations_simplifiedother -0.968
## defined_populations_simplifiedother_combinations 0.286
## species_rangewide ranging 0.661
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 0.792
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 0.763
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 0.002
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 0.768
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging 0.944
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging -1.012
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 1.127
## defined_populations_simplifiedmanagement_units:species_rangewide ranging -1.093
## defined_populations_simplifiedother:species_rangewide ranging 0.003
## defined_populations_simplifiedother_combinations:species_rangewide ranging 0.452
## Pr(>|z|)
## (Intercept) 0.014
## defined_populations_simplifiedeco_biogeo_proxies 0.377
## defined_populations_simplifiedgenetic_clusters 0.902
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.431
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.290
## defined_populations_simplifiedgeographic_boundaries 0.506
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.671
## defined_populations_simplifiedgeographic_boundaries management_units 0.780
## defined_populations_simplifiedmanagement_units 0.771
## defined_populations_simplifiedother 0.333
## defined_populations_simplifiedother_combinations 0.775
## species_rangewide ranging 0.509
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging 0.428
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging 0.445
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging 0.999
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging 0.443
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging 0.345
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging 0.312
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging 0.260
## defined_populations_simplifiedmanagement_units:species_rangewide ranging 0.275
## defined_populations_simplifiedother:species_rangewide ranging 0.998
## defined_populations_simplifiedother_combinations:species_rangewide ranging 0.651
##
## (Intercept) *
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations
## species_rangewide ranging
## defined_populations_simplifiedeco_biogeo_proxies:species_rangewide ranging
## defined_populations_simplifiedgenetic_clusters:species_rangewide ranging
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies:species_rangewide ranging
## defined_populations_simplifiedgenetic_clusters geographic_boundaries:species_rangewide ranging
## defined_populations_simplifiedgeographic_boundaries:species_rangewide ranging
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies:species_rangewide ranging
## defined_populations_simplifiedgeographic_boundaries management_units:species_rangewide ranging
## defined_populations_simplifiedmanagement_units:species_rangewide ranging
## defined_populations_simplifiedother:species_rangewide ranging
## defined_populations_simplifiedother_combinations:species_rangewide ranging
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## + country
m2.1 <- glmmTMB(indicator2_mean ~ defined_populations_simplified + species_range + (1|country_assessment), family = "ordbeta", data = data)
# summary results
summary(m2.1)
## Family: ordbeta ( logit )
## Formula:
## indicator2_mean ~ defined_populations_simplified + species_range +
## (1 | country_assessment)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 644.1 712.4 -306.1 612.1 512
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## country_assessment (Intercept) 0.3057 0.5529
## Number of obs: 528, groups: country_assessment, 9
##
## Dispersion parameter for ordbeta family (): 4.04
##
## Conditional model:
## Estimate
## (Intercept) 0.67903
## defined_populations_simplifiedeco_biogeo_proxies -0.14730
## defined_populations_simplifiedgenetic_clusters 0.17173
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.08718
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.22824
## defined_populations_simplifiedgeographic_boundaries -0.06515
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.30377
## defined_populations_simplifiedgeographic_boundaries management_units 0.17681
## defined_populations_simplifiedmanagement_units -0.44989
## defined_populations_simplifiedother -0.19553
## defined_populations_simplifiedother_combinations 0.19570
## species_rangewide ranging 0.35877
## Std. Error
## (Intercept) 0.28627
## defined_populations_simplifiedeco_biogeo_proxies 0.33588
## defined_populations_simplifiedgenetic_clusters 0.33655
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.56955
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.31637
## defined_populations_simplifiedgeographic_boundaries 0.25524
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.33660
## defined_populations_simplifiedgeographic_boundaries management_units 0.40696
## defined_populations_simplifiedmanagement_units 0.37702
## defined_populations_simplifiedother 0.56093
## defined_populations_simplifiedother_combinations 0.26427
## species_rangewide ranging 0.11743
## z value
## (Intercept) 2.372
## defined_populations_simplifiedeco_biogeo_proxies -0.439
## defined_populations_simplifiedgenetic_clusters 0.510
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.153
## defined_populations_simplifiedgenetic_clusters geographic_boundaries -0.721
## defined_populations_simplifiedgeographic_boundaries -0.255
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.902
## defined_populations_simplifiedgeographic_boundaries management_units 0.434
## defined_populations_simplifiedmanagement_units -1.193
## defined_populations_simplifiedother -0.349
## defined_populations_simplifiedother_combinations 0.741
## species_rangewide ranging 3.055
## Pr(>|z|)
## (Intercept) 0.01769
## defined_populations_simplifiedeco_biogeo_proxies 0.66098
## defined_populations_simplifiedgenetic_clusters 0.60987
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies 0.87834
## defined_populations_simplifiedgenetic_clusters geographic_boundaries 0.47065
## defined_populations_simplifiedgeographic_boundaries 0.79852
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies 0.36682
## defined_populations_simplifiedgeographic_boundaries management_units 0.66396
## defined_populations_simplifiedmanagement_units 0.23276
## defined_populations_simplifiedother 0.72741
## defined_populations_simplifiedother_combinations 0.45898
## species_rangewide ranging 0.00225
##
## (Intercept) *
## defined_populations_simplifiedeco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters
## defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies
## defined_populations_simplifiedgenetic_clusters geographic_boundaries
## defined_populations_simplifiedgeographic_boundaries
## defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies
## defined_populations_simplifiedgeographic_boundaries management_units
## defined_populations_simplifiedmanagement_units
## defined_populations_simplifiedother
## defined_populations_simplifiedother_combinations
## species_rangewide ranging **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Single plot PM and Ne indicators by range type
plot_grid(p1, p2, ncol=1, align = "v", labels=c("a)", "b)"))

Main Figure: Violin plots indicatrors by of range type coloring
points to show genetic clusters
For Ne indicator:
# add variable stating if genetic methods are used
indicators_averaged_one<- indicators_averaged_one %>%
mutate(genetic_to_define_pops = ifelse(grepl("genetic", defined_populations_simplified), 'genetic method', 'non genetic'))
# get sample size by desired category
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(species_range)) %>%
group_by(species_range) %>% summarize(num=n())
# plot
p1 <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(species_range)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%
# plot
ggplot(aes(x=myaxis, y=indicator1_mean)) +
geom_violin(width=1, linewidth = 0, fill="grey70") +
xlab("") + ylab("Proportion of populations with Ne>500") +
coord_flip() +
new_scale_color() + # to color the points without confusing ggplot
geom_jitter(size=1.2, width = 0.1, aes(color = genetic_to_define_pops)) +
scale_color_manual(values=c("red", "black")) +
theme_light() +
labs(color=NULL) + # hide legend title
theme(panel.border = element_blank(), legend.position="bottom", text= element_text(size=20))
## Joining, by = "species_range"
p1

For PM indicator:
# add variable stating if genetic methods are used
indicators_averaged_one<- indicators_averaged_one %>%
mutate(genetic_to_define_pops = ifelse(grepl("genetic", defined_populations_simplified), 'genetic method', 'non genetic'))
# get sample size by desired category
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(species_range)) %>%
group_by(species_range) %>% summarize(num=n())
# plot
p2<-indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(species_range)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%
# plot
ggplot(aes(x=myaxis, y=indicator2_mean)) +
geom_violin(width=1, linewidth = 0, fill="grey70") +
xlab("") + ylab("Proportion of populations maintained") +
coord_flip() +
new_scale_color() + # to color points without confuisng ggplot
geom_jitter(size=1.2, width = 0.1, aes(color = genetic_to_define_pops)) +
scale_color_manual(values=c("red", "black")) +
labs(color=NULL) + # hide legend title
theme_light() +
theme(panel.border = element_blank(), legend.position="bottom", text= element_text(size=20))
## Joining, by = "species_range"
p2
Two panel figure:
plot_grid(p1 + theme(legend.position = "non2"), # legend can be shown only below both plots
p2,
ncol = 1,
align = "v")

Indicatros by threat status (IUCN Red List)
All the following plots and analyses consider the average of
multiassessed species (variable _mean), so that they are
shown only once.
(a) Ne > 500 indicator and red list status
Plot indicator 1 by global IUCN in the entire dataset:
## Global IUCN
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(global_IUCN)) %>%
group_by(global_IUCN) %>% summarize(num=n())
# new df
df<- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(global_IUCN)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("cr", unique(df$myaxis), value = TRUE),
grep("en", unique(df$myaxis), value = TRUE),
grep("vu", unique(df$myaxis), value = TRUE),
grep("nt", unique(df$myaxis), value = TRUE),
grep("lc", unique(df$myaxis), value = TRUE),
grep("dd", unique(df$myaxis), value = TRUE),
grep("not_assessed", unique(df$myaxis), value = TRUE),
grep("unknown", unique(df$myaxis), value = TRUE)))
df$global_IUCN<-factor(df$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# plot
p1<-df %>%
ggplot(aes(x=myaxis, y=indicator1_mean , fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations with Ne>500") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(df$global_IUCN))) +
scale_x_discrete(limits=rev) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=15))
p1

Summary table:
x <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(global_IUCN)) %>%
group_by(global_IUCN) %>%
summarize(n=n(),
mean=mean(indicator1_mean),
median=median(indicator1_mean),
per.0=sum(indicator1_mean==0) / n *100,
per.below.25=sum(indicator1_mean<0.25) / n *100,
per.below.90=sum(indicator1_mean<0.90) / n *100,
per.above.75=sum(indicator1_mean>0.75)/ n *100,
per1=sum(indicator1_mean==1) / n *100)
kable(x, digits=2)
| cr |
44 |
0.11 |
0.00 |
84.09 |
86.36 |
93.18 |
6.82 |
6.82 |
| dd |
10 |
0.44 |
0.21 |
40.00 |
50.00 |
60.00 |
40.00 |
40.00 |
| en |
47 |
0.25 |
0.00 |
65.96 |
70.21 |
80.85 |
19.15 |
19.15 |
| lc |
180 |
0.38 |
0.06 |
46.67 |
54.44 |
71.11 |
30.00 |
28.89 |
| not_assessed |
159 |
0.18 |
0.00 |
64.15 |
74.84 |
91.19 |
8.81 |
8.81 |
| nt |
51 |
0.24 |
0.00 |
54.90 |
72.55 |
84.31 |
15.69 |
15.69 |
| unknown |
3 |
0.67 |
1.00 |
33.33 |
33.33 |
33.33 |
66.67 |
66.67 |
| vu |
66 |
0.32 |
0.00 |
56.06 |
59.09 |
77.27 |
24.24 |
22.73 |
Indicator 1 by country and global IUCN
## change order of levels so that categories match with the order of colors
indicators_averaged_one$global_IUCN<-factor(indicators_averaged_one$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# plot
indicators_averaged_one %>%
filter(!is.na(regional_redlist)) %>%
# plot
ggplot(aes(x=global_IUCN, y=indicator1_mean, fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations with Ne>500") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(indicators_averaged_one$global_IUCN))) +
scale_x_discrete(limits=rev) +
theme_light() +
ggtitle("global IUCN Redlist") +
theme(panel.border = element_blank(), legend.position="none",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=13)) +
facet_wrap(~country_assessment, ncol = 3) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 344 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 344 rows containing missing values (`geom_point()`).

Indicator1 by regional IUCN Redlist, excluding US, Australia and
Mexico becasue they don’t have a regional IUCN redlist.
## change order of levels so that categories match with the order of colors
indicators_averaged_one$regional_redlist<-factor(indicators_averaged_one$regional_redlist, levels=c("re","cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# plot
indicators_averaged_one %>%
# filter US and Mx
filter(country_assessment %!in% c("Mexico", "US", "Australia")) %>%
filter(!is.na(regional_redlist)) %>%
# plot
ggplot(aes(x=regional_redlist, y=indicator1_mean, fill=regional_redlist)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of populations with Ne>500") +
coord_flip() +
scale_fill_manual(values= IUCNcolors_regional, # iucn color codes
breaks=c(levels(indicators_averaged_one$regional_redlist))) +
scale_x_discrete(limits=rev) +
theme_light() +
ggtitle("regional IUCN Redlist") +
theme(panel.border = element_blank(), legend.position="none",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=15)) +
facet_wrap(~country_assessment, ncol = 3) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 170 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 170 rows containing missing values (`geom_point()`).

(b) Proportion of Maintained Populations and red list status?
Plot indicator 2 by global IUCN in the entire dataset:
## Global IUCN
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(global_IUCN)) %>%
group_by(global_IUCN) %>% summarize(num=n())
# new df
df<- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(global_IUCN)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("cr", unique(df$myaxis), value = TRUE),
grep("en", unique(df$myaxis), value = TRUE),
grep("vu", unique(df$myaxis), value = TRUE),
grep("nt", unique(df$myaxis), value = TRUE),
grep("lc", unique(df$myaxis), value = TRUE),
grep("dd", unique(df$myaxis), value = TRUE),
grep("not_assessed", unique(df$myaxis), value = TRUE),
grep("unknown", unique(df$myaxis), value = TRUE)))
df$global_IUCN<-factor(df$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# plot
p2<-df %>%
ggplot(aes(x=myaxis, y=indicator2 , fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of maintained populations") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(df$global_IUCN))) +
scale_x_discrete(limits=rev) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=15))
p2
## Warning: Removed 2 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).

Summary table:
x <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(global_IUCN)) %>%
group_by(global_IUCN) %>%
summarize(n=n(),
mean=mean(indicator2_mean),
median=median(indicator2_mean),
per.0=sum(indicator2_mean==0) / n *100,
per.below.25=sum(indicator2_mean<0.25) / n *100,
per.below.90=sum(indicator2_mean<0.90) / n *100,
per.above.75=sum(indicator2_mean>0.75)/ n *100,
per1=sum(indicator2_mean==1) / n *100)
kable(x, digits=2)
| cr |
40 |
0.85 |
1.00 |
0.00 |
5.00 |
32.50 |
77.50 |
67.50 |
| en |
59 |
0.79 |
0.86 |
0.00 |
1.69 |
50.85 |
61.02 |
49.15 |
| vu |
74 |
0.81 |
1.00 |
1.35 |
2.70 |
43.24 |
64.86 |
51.35 |
| nt |
50 |
0.86 |
1.00 |
0.00 |
4.00 |
32.00 |
76.00 |
64.00 |
| lc |
154 |
0.85 |
1.00 |
0.65 |
3.25 |
33.12 |
73.38 |
62.34 |
| dd |
9 |
0.71 |
0.83 |
0.00 |
0.00 |
66.67 |
66.67 |
33.33 |
| not_assessed |
158 |
0.84 |
0.99 |
0.63 |
1.90 |
39.24 |
70.89 |
50.00 |
| unknown |
2 |
1.00 |
1.00 |
0.00 |
0.00 |
0.00 |
100.00 |
100.00 |
Indicator 2 by country and global IUCN
## change order of levels so that categories match with the order of colors
indicators_averaged_one$global_IUCN<-factor(indicators_averaged_one$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# plot
indicators_averaged_one %>%
filter(!is.na(regional_redlist)) %>%
# plot
ggplot(aes(x=global_IUCN, y=indicator2_mean, fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
geom_jitter(size=.5, width = 0.1) +
xlab("") + ylab("Proportion of maintained populations") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(indicators_averaged_one$global_IUCN))) +
scale_x_discrete(limits=rev) +
theme_light() +
ggtitle("global IUCN Redlist") +
theme(panel.border = element_blank(), legend.position="none",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=13)) +
facet_wrap(~country_assessment, ncol = 3) +
theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 358 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 358 rows containing missing values (`geom_point()`).

Main Figure: Single plot 2 pannels IUCN redlist and indicator range
values
plot_grid(p1,
p2,
ncol=1, align = "v", labels=c("a)", "b)"))
## Warning: Removed 2 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).

Comparing the Ne indicator against a mock IUCN assessment adding up
all populations
Generate mock Ne data for the entire species, by adding up the Ne of
each population within each species.
# Sum the Ne of each population within spp
x <- ind1_data %>% group_by(X_uuid, taxon, country_assessment, multiassessment) %>% # this groups by individual species, considering mutliassessed species
# sum Ne by individual species, keeping multiassesments separate
summarise(Ne_mock_species = sum(Ne_combined, na.rm = TRUE)) %>%
# average for multiassessed records in a single species
group_by(country_assessment, multiassessment, taxon) %>%
summarise(Ne_mock_species=mean(Ne_mock_species, na.rm=TRUE))
## `summarise()` has grouped output by 'X_uuid', 'taxon', 'country_assessment'.
## You can override using the `.groups` argument.
## `summarise()` has grouped output by 'country_assessment', 'multiassessment'.
## You can override using the `.groups` argument.
# Add mto indicator data
indicators_averaged_one<- left_join(indicators_averaged_one, x) %>%
# add a below above category
mutate(Ne_mock_category = ifelse(Ne_mock_species > 500, 'Above 500', 'Below 500'))
## Joining, by = c("country_assessment", "taxon", "multiassessment")
indicators_averaged_one %>% select(taxon, country_assessment, Ne_mock_species, Ne_mock_category) %>% head()
## Adding missing grouping variables: `multiassessment`
Plot the Ne indicator as in the violion plots above, but coloring the
points showing which species would be below or above Ne 500 if
considering Ne at the species level.
# add sampling size
sample_size <- indicators_averaged_one %>%
filter(!is.na(Ne_mock_species)) %>%
filter(!is.na(global_IUCN)) %>%
filter(Ne_mock_species<1000000) %>%
group_by(global_IUCN) %>% summarize(num=n())
# new df
df<- indicators_averaged_one %>%
filter(!is.na(Ne_mock_species)) %>%
filter(!is.na(global_IUCN)) %>%
filter(Ne_mock_species<1000000) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("cr", unique(df$myaxis), value = TRUE),
grep("en", unique(df$myaxis), value = TRUE),
grep("vu", unique(df$myaxis), value = TRUE),
grep("nt", unique(df$myaxis), value = TRUE),
grep("lc", unique(df$myaxis), value = TRUE),
grep("dd", unique(df$myaxis), value = TRUE),
grep("not_assessed", unique(df$myaxis), value = TRUE),
grep("unknown", unique(df$myaxis), value = TRUE)))
df$global_IUCN<-factor(df$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
# Plot the Ne indicator as above, but with points colored by Ne_mock above or below 500
df %>%
ggplot(aes(x=myaxis, y=indicator1_mean , fill=global_IUCN)) +
geom_violin(width=1, linewidth = 0) +
xlab("") + ylab("Proportion of populations with Ne>500") +
coord_flip() +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=c(levels(df$global_IUCN)),
guide = "none") + # hide legend
scale_x_discrete(limits=rev) +
# add new scale color for points
new_scale_color() +
geom_jitter(size=1, width = 0.1, aes(color = Ne_mock_category)) +
scale_color_manual(values=c("black", "#F0A6CA")) +
labs(color= "Species (mock) Ne") +
# theme stuff
theme_light() +
theme(panel.border = element_blank(), legend.position = "bottom",
plot.title = element_text(hjust = 0.5), # center title
text= element_text(size=15))
## Warning: Removed 26 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 26 rows containing non-finite values (`new_stat_ydensity()`).
## Warning: Removed 26 rows containing missing values (`geom_point()`).

indicators_averaged_one %>%
filter(!is.na(Ne_mock_category)) %>%
ggplot(aes(x=global_IUCN, fill=Ne_mock_category))+
geom_bar(position = "dodge") +
scale_fill_manual(values=c("black", "#F0A6CA")) +
labs(color= "Species (mock) Ne") +
coord_flip() +
theme_light() +
scale_x_discrete(limits=rev) +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())

Summnary table
x <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
group_by(global_IUCN, Ne_mock_category) %>%
summarize(n=n(),
mean=mean(indicator1_mean),
median=median(indicator1_mean),
per.0=sum(indicator1_mean==0) / n *100,
per.below.25=sum(indicator1_mean<0.25) / n *100,
per.below.90=sum(indicator1_mean<0.90) / n *100,
per.above.75=sum(indicator1_mean>0.75)/ n *100,
per1=sum(indicator1_mean==1) / n *100)
## `summarise()` has grouped output by 'global_IUCN'. You can override using the
## `.groups` argument.
kable(x, digits = 2)
| cr |
Above 500 |
9 |
0.56 |
0.67 |
22.22 |
33.33 |
66.67 |
33.33 |
33.33 |
| cr |
Below 500 |
35 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
| en |
Above 500 |
19 |
0.63 |
0.60 |
15.79 |
26.32 |
52.63 |
47.37 |
47.37 |
| en |
Below 500 |
28 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
| vu |
Above 500 |
30 |
0.71 |
0.92 |
3.33 |
10.00 |
50.00 |
53.33 |
50.00 |
| vu |
Below 500 |
35 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
| vu |
NA |
1 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
| nt |
Above 500 |
27 |
0.45 |
0.33 |
14.81 |
48.15 |
70.37 |
29.63 |
29.63 |
| nt |
Below 500 |
24 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
| lc |
Above 500 |
111 |
0.61 |
0.67 |
13.51 |
26.13 |
53.15 |
48.65 |
46.85 |
| lc |
Below 500 |
69 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
| dd |
Above 500 |
7 |
0.63 |
1.00 |
14.29 |
28.57 |
42.86 |
57.14 |
57.14 |
| dd |
Below 500 |
3 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
| not_assessed |
Above 500 |
77 |
0.38 |
0.25 |
25.97 |
48.05 |
81.82 |
18.18 |
18.18 |
| not_assessed |
Below 500 |
82 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
| unknown |
Above 500 |
2 |
1.00 |
1.00 |
0.00 |
0.00 |
0.00 |
100.00 |
100.00 |
| unknown |
Below 500 |
1 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
| NA |
Below 500 |
1 |
0.00 |
0.00 |
100.00 |
100.00 |
100.00 |
0.00 |
0.00 |
Indicator values by taxonomic group
All the following plots and analyses consider the average of
multiassessed species (variable _mean), so that they are shown only
once.
We also grouped taxa with small n (<5) into “others”, according to
the following table:
table(indicators_averaged_one$taxonomic_group)
##
## amphibian angiosperm bird bryophyte fish
## 56 235 157 5 62
## fungus gymnosperm invertebrate mammal other
## 3 19 135 135 18
## pteridophytes reptile
## 14 70
They are grouped along with “other” in a new category “others” in the
new variable taxonomic_group_simplified:
indicators_averaged_one <- indicators_averaged_one %>%
mutate(taxonomic_group_simplified = case_when(
# if the taxon group is in the list of groups with small n change to "others"
taxonomic_group %!in% c("bryophyte", "fungus", "other") ~ taxonomic_group,
TRUE ~ "others"))
# check:
table(indicators_averaged_one$taxonomic_group_simplified)
##
## amphibian angiosperm bird fish gymnosperm
## 56 235 157 62 19
## invertebrate mammal others pteridophytes reptile
## 135 135 26 14 70
Violin plots and summary tables for each indicator by taxonomic
group
Indicator Ne > 500
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
group_by(taxonomic_group_simplified) %>% summarize(num=n())
# new df
df<- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(taxonomic_group_simplified, " (n= ", num, ")"))
## Joining, by = "taxonomic_group_simplified"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("amphibian", unique(df$myaxis), value = TRUE),
grep("bird" , unique(df$myaxis), value = TRUE),
grep("fish" , unique(df$myaxis), value = TRUE),
grep("invertebrate", unique(df$myaxis), value = TRUE),
grep("mammal", unique(df$myaxis), value = TRUE),
grep("reptile", unique(df$myaxis), value = TRUE),
grep("angiosperm", unique(df$myaxis), value = TRUE),
grep("gymnosperm", unique(df$myaxis), value = TRUE),
grep("pteridophytes", unique(df$myaxis), value = TRUE),
grep("others" , unique(df$myaxis), value = TRUE)))
df$taxonomic_group_simplified<-factor(df$taxonomic_group_simplified,
levels=c("amphibian", "bird" , "fish" , "invertebrate", "mammal", "reptile",
"angiosperm", "gymnosperm", "pteridophytes",
"others"))
# plot
p1<-df %>%
ggplot(aes(x=myaxis, y=indicator1_mean, fill=taxonomic_group_simplified, color=taxonomic_group_simplified)) +
geom_violin(width=1.5, linewidth = 0.2) +
geom_jitter(size=.7, width = 0.1, color="black") +
xlab("") + ylab("Proportion of populations with Ne>500") +
coord_flip() +
scale_x_discrete(limits=rev) +
scale_fill_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
rep(grouped_taxon_colors[2], 3), # for platns
rep(grouped_taxon_colors[3], 1)), # for fungi and others
breaks=c(levels(df$taxonomic_group_simplified))) +
scale_color_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
rep(grouped_taxon_colors[2], 3), # for platns
rep(grouped_taxon_colors[3], 1)), # for fungi and others
breaks=c(levels(df$taxonomic_group_simplified))) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
text= element_text(size=15))
p1
## Warning: `position_dodge()` requires non-overlapping x intervals

Table with sampling size, mean indicator value and proporiton of taxa
where the value is below 0.25, 0.50 and 0.75:
#summary table by taxonomic group
x <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(taxonomic_group_simplified)) %>%
group_by(taxonomic_group_simplified) %>%
summarize(n=n(),
mean=mean(indicator1_mean),
median=median(indicator1_mean),
n.below.75=sum(indicator1_mean<0.75),
n.below.50=sum(indicator1_mean<0.50),
n.below.25=sum(indicator1_mean<0.25),
per.below.25=n.below.25/n*100,
per.below.50=n.below.50/n*100)
# Calculate total counts and means
total_counts <- summarise(x,
taxonomic_group_simplified = "ALL",
n = sum(n),
mean= mean(mean),
median=median(median),
n.below.75 = sum(n.below.75),
n.below.50 = sum(n.below.50),
n.below.25 = sum(n.below.25),
per.below.25 = n.below.25 / n * 100,
per.below.50 = n.below.50 / n * 100)
# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)
# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_simplified<-factor(summary_table$taxonomic_group_simplified,
levels = c("amphibian", "bird" , "fish" , "invertebrate", "mammal",
"angiosperm", "gymnosperm", "reptile", "pteridophytes",
"others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_simplified)
# show nice table
kable(summary_table, digits=2)
| amphibian |
24 |
0.17 |
0.00 |
23 |
19 |
17 |
70.83 |
79.17 |
| bird |
89 |
0.33 |
0.00 |
64 |
58 |
56 |
62.92 |
65.17 |
| fish |
34 |
0.39 |
0.20 |
25 |
20 |
18 |
52.94 |
58.82 |
| invertebrate |
65 |
0.29 |
0.00 |
50 |
45 |
44 |
67.69 |
69.23 |
| mammal |
96 |
0.42 |
0.08 |
62 |
54 |
50 |
52.08 |
56.25 |
| angiosperm |
186 |
0.18 |
0.00 |
168 |
152 |
138 |
74.19 |
81.72 |
| gymnosperm |
15 |
0.16 |
0.00 |
13 |
13 |
12 |
80.00 |
86.67 |
| reptile |
31 |
0.30 |
0.00 |
23 |
22 |
20 |
64.52 |
70.97 |
| pteridophytes |
11 |
0.18 |
0.00 |
11 |
8 |
8 |
72.73 |
72.73 |
| others |
10 |
0.15 |
0.00 |
9 |
8 |
8 |
80.00 |
80.00 |
| ALL |
561 |
0.26 |
0.00 |
448 |
399 |
371 |
66.13 |
71.12 |
Indicator Proportion of mantained populations:
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
group_by(taxonomic_group_simplified) %>% summarize(num=n())
# new df
df<- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
# add sampling size
left_join(sample_size) %>%
mutate(myaxis = paste0(taxonomic_group_simplified, " (n= ", num, ")"))
## Joining, by = "taxonomic_group_simplified"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis,
#grep is used below to get the sample size, which may change depending on the data
levels=c(grep("amphibian", unique(df$myaxis), value = TRUE),
grep("bird" , unique(df$myaxis), value = TRUE),
grep("fish" , unique(df$myaxis), value = TRUE),
grep("invertebrate", unique(df$myaxis), value = TRUE),
grep("mammal", unique(df$myaxis), value = TRUE),
grep("reptile", unique(df$myaxis), value = TRUE),
grep("angiosperm", unique(df$myaxis), value = TRUE),
grep("gymnosperm", unique(df$myaxis), value = TRUE),
grep("pteridophytes", unique(df$myaxis), value = TRUE),
grep("others" , unique(df$myaxis), value = TRUE)))
df$taxonomic_group_simplified<-factor(df$taxonomic_group_simplified,
levels=c("amphibian", "bird" , "fish" , "invertebrate", "mammal", "reptile",
"angiosperm", "gymnosperm", "pteridophytes",
"others"))
# plot
p2<-df %>%
ggplot(aes(x=myaxis, y=indicator2_mean, fill=taxonomic_group_simplified, color=taxonomic_group_simplified)) +
geom_violin(width=1, linewidth = 0.2) +
geom_jitter(size=.7, width = 0.1, color="black") +
xlab("") + ylab("Proportion of maintained populations") +
coord_flip() +
scale_x_discrete(limits=rev) +
scale_fill_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
rep(grouped_taxon_colors[2], 3), # for platns
rep(grouped_taxon_colors[3], 1)), # for fungi and others
breaks=c(levels(df$taxonomic_group_simplified))) +
scale_color_manual(values= c(rep(grouped_taxon_colors[1], 6), # for animals
rep(grouped_taxon_colors[2], 3), # for platns
rep(grouped_taxon_colors[3], 1)), # for fungi and others
breaks=c(levels(df$taxonomic_group_simplified))) +
theme_light() +
theme(panel.border = element_blank(), legend.position="none",
text= element_text(size=15))
p2

Table with sampling size, mean indicator value and proporiton of taxa
where the value is below 0.25, 0.50 and 0.75:
# summary table for taxonomic group:
x <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(taxonomic_group_simplified)) %>%
group_by(taxonomic_group_simplified) %>%
summarize(n=n(),
mean=mean(indicator2_mean),
median=median(indicator2_mean),
n.below.75=sum(indicator2_mean<0.75),
n.below.50=sum(indicator2_mean<0.50),
n.below.25=sum(indicator2_mean<0.25),
per.below.25=n.below.25/n*100,
per.below.50=n.below.50/n*100)
# Calculate total counts and means
total_counts <- summarise(x,
taxonomic_group_simplified = "ALL",
n = sum(n),
mean = mean(mean),
median = median(median),
n.below.75 = sum(n.below.75),
n.below.50 = sum(n.below.50),
n.below.25 = sum(n.below.25),
per.below.25 = n.below.25 / n * 100,
per.below.50 = n.below.50 / n * 100)
# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)
# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_simplified<-factor(summary_table$taxonomic_group_simplified,
levels = c("amphibian", "bird" , "fish" , "invertebrate", "mammal",
"angiosperm", "gymnosperm", "reptile", "pteridophytes",
"others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_simplified)
# show nice table
kable(summary_table, digits=2)
| amphibian |
43 |
0.85 |
1.00 |
9 |
4 |
1 |
2.33 |
9.30 |
| bird |
84 |
0.83 |
1.00 |
24 |
9 |
2 |
2.38 |
10.71 |
| fish |
42 |
0.79 |
0.88 |
17 |
3 |
1 |
2.38 |
7.14 |
| invertebrate |
77 |
0.67 |
0.67 |
40 |
21 |
7 |
9.09 |
27.27 |
| mammal |
80 |
0.94 |
1.00 |
8 |
3 |
0 |
0.00 |
3.75 |
| angiosperm |
145 |
0.84 |
1.00 |
36 |
13 |
4 |
2.76 |
8.97 |
| gymnosperm |
9 |
0.97 |
1.00 |
0 |
0 |
0 |
0.00 |
0.00 |
| reptile |
38 |
0.91 |
1.00 |
5 |
2 |
0 |
0.00 |
5.26 |
| pteridophytes |
8 |
0.82 |
1.00 |
3 |
1 |
0 |
0.00 |
12.50 |
| others |
20 |
0.83 |
0.89 |
6 |
1 |
0 |
0.00 |
5.00 |
| ALL |
546 |
0.85 |
1.00 |
148 |
57 |
15 |
2.75 |
10.44 |
Values of indicator 1 and indicator 2 for multiassessed species
#subset only with taxa assessed multiple times:
only_multi<-indicators_full %>%
filter(multiassessment=="multiassessment")
First, check how indicator 1 changes across the multiassessments.
p1<-only_multi %>%
# Keep rows with different values in indicator1 within each taxon group
group_by(taxon) %>%
filter(n_distinct(indicator1) > 1) %>%
# plot
ggplot(aes(x=taxon, y=indicator1)) +
geom_line(colour="darkgrey") +
geom_point(aes(color=country_assessment)) +
xlab("") + ylab("Proportion of populations with Ne>500") +
labs(color="country") +
ylim(0, 1)+
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="right", text= element_text(size=13))
p1
## Warning: Removed 7 rows containing missing values (`geom_line()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).

Now check how Proportion of maintained populations (indicator 2)
changes across the multiassessments.
p2<-only_multi %>%
# Keep rows with different values in indicator1 within each taxon group
group_by(taxon) %>%
filter(n_distinct(indicator2) > 1) %>%
ggplot(aes(x=taxon, y=indicator2)) +
geom_line(colour="darkgrey") +
geom_point(aes(color=country_assessment)) +
scale_color_manual(values= scales::hue_pal()(4)[2:4]) + # last 3 colors to make them the same than the other plot
xlab("") + ylab("Proportion of populations maintained") +
labs(color="country") +
coord_flip() +
theme_light() +
theme(panel.border = element_blank(), legend.position="right", text= element_text(size=13))
p2
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).

Plot together:
plot_grid(p2, p1,
rel_heights = c(1.3, 0.9),
ncol=1, labels=c("a)", "b)"))
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).
## Warning: Removed 7 rows containing missing values (`geom_line()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).

Indicator 3 (number of species with genetic diversity
monitoring)
Indicator 3 refers to the number (count) of taxa by country in which
genetic monitoring is occurring. This is stored in the variable
temp_gen_monitoring as a “yes/no” answer for each taxon.
indicator3
Plot by global IUCN redlist status
# desired order of levels
indicators_full$global_IUCN<-factor(as.factor(indicators_full$global_IUCN), levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
## plot
indicators_full %>%
# keep only one record if the taxon was assessed more than once within the country
select(country_assessment, taxon, temp_gen_monitoring, global_IUCN) %>%
filter(!duplicated(.)) %>%
# count "yes" in tem_gen_monitoring by country
filter(temp_gen_monitoring=="yes") %>%
ggplot(aes(x=country_assessment, fill=global_IUCN)) +
geom_bar() +
xlab("") + ylab("Number of taxa with temporal genetic diversity monitoring") +
scale_fill_manual(values= IUCNcolors, # iucn color codes
breaks=levels(as.factor(indicators_full$global_IUCN))) +
theme_light()

Relatively few taxa have genetic monitoring, but many have some sort
of genetic study. Let’s check that with a Sankey Plot:
# first subset the ind3_data keeping only taxa assessed a single time, plust the first record of those assessed multiple times.
ind3_data_firstmulti<-ind3_data[!duplicated(cbind(ind3_data$taxon, ind3_data$country_assessment)), ]
# transform data to how ggsankey wants it
df <- ind3_data_firstmulti %>%
make_long(country_assessment, temp_gen_monitoring, gen_studies)
# plot
ggplot(df, aes(x = x,
next_x = next_x,
node = node,
next_node = next_node,
fill = factor(node),
label = node)) +
geom_sankey(flow.alpha = 0.5,
show.legend = FALSE) +
geom_sankey_label(size = 2.5, color = "black", fill = "white") +
theme_sankey(base_size = 10) +
# manually set flow fill according to desired color
# countries
scale_fill_manual(values=c(scales::hue_pal()(length(unique(ind3_data_firstmulti$country_assessment))),
# traffic light for monitoring
c("darkolivegreen", "brown3", "darkgrey"),
# nice soft colors for gen_studies
c("grey50", "grey35", "grey50", "brown3")),
breaks=c(unique(ind3_data_firstmulti$country_assessment),
unique(ind3_data_firstmulti$temp_gen_monitoring),
unique(ind3_data_firstmulti$gen_studies))) +
xlab("")
## Warning: Removed 2 rows containing missing values (`geom_label()`).

table(ind3_data_firstmulti$gen_studies)
##
## no phylo phylo_pop pop
## 386 185 239 94
Count data:
ind3_data %>%
# keep only one record if the taxon was assessed more than once within the country
select(country_assessment, taxon, gen_studies, temp_gen_monitoring) %>%
filter(!duplicated(.)) %>%
group_by(country_assessment, temp_gen_monitoring, gen_studies) %>%
summarise(n_studies=n())
## `summarise()` has grouped output by 'country_assessment',
## 'temp_gen_monitoring'. You can override using the `.groups` argument.
How many genetic studies ara available by country for species without
temporal genetic diversity monitoring?
## plot
indicators_full %>%
# keep only one record if the taxon was assessed more than once within the country
select(country_assessment, taxon, temp_gen_monitoring, gen_studies) %>%
filter(!duplicated(.)) %>%
# keep only taxa without gen div monitoring
filter(temp_gen_monitoring=="no")%>%
ggplot(aes(x=country_assessment, fill=gen_studies)) +
geom_bar() +
scale_fill_manual(values=c("grey80", scales::hue_pal()(3)))+
xlab("") +
theme_light()

Summary table of mean indicator values and n
The tables below show the indicator values and sampling size
averaging them by country, taxonomic group, distribution type or IUCN
global red list status. For this summary the mean of the multiassessed
species was considering and counted as a single entry for the sampling
size.
Codes for indicator names:
- PM.ind: Proportion of Mantained populations
indicator (indicator 2)
- Ne.ind: Proportion of populations where Ne>500
indicator (indicator 1)
- Mon.ind: Number of species where genetic diversity
monitoring is taking place (indicator 3)
Codes for summary stats:
- n: sampling size (number of taxa assessed) without
missing data
- mean: mean value for the indicator value
- sd: standar deviation for the indicator value
Summary stats by country:
x<-indicators_averaged_one %>%
group_by(country_assessment) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE),
Mon.ind=sum(temp_gen_monitoring=="yes"))
# nice table
kable(x, digits=3)
| Australia |
28 |
0.903 |
0.178 |
47 |
0.170 |
0.299 |
10 |
| Belgium |
27 |
0.453 |
0.221 |
101 |
0.246 |
0.381 |
10 |
| Colombia |
50 |
0.831 |
0.230 |
41 |
0.341 |
0.480 |
NA |
| France |
34 |
0.854 |
0.278 |
55 |
0.416 |
0.471 |
7 |
| Japan |
50 |
0.925 |
0.152 |
50 |
0.077 |
0.180 |
0 |
| Mexico |
28 |
0.936 |
0.135 |
47 |
0.217 |
0.354 |
7 |
| S. Africa |
90 |
0.948 |
0.155 |
61 |
0.422 |
0.475 |
5 |
| Sweden |
120 |
0.777 |
0.271 |
81 |
0.192 |
0.334 |
20 |
| US |
117 |
0.794 |
0.244 |
75 |
0.370 |
0.415 |
6 |
Taxonomic groups
Summary stats by taxonomic group:
x<-indicators_averaged_one %>%
group_by(taxonomic_group) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE),
Mon.ind=sum(temp_gen_monitoring=="yes"))
# nice table
kable(x, digits=3)
| amphibian |
43 |
0.833 |
0.244 |
24 |
0.159 |
0.258 |
9 |
| angiosperm |
144 |
0.841 |
0.239 |
186 |
0.179 |
0.313 |
6 |
| bird |
83 |
0.834 |
0.252 |
89 |
0.328 |
0.448 |
NA |
| bryophyte |
4 |
0.688 |
0.252 |
2 |
0.250 |
0.354 |
0 |
| fish |
42 |
0.779 |
0.244 |
34 |
0.414 |
0.448 |
11 |
| fungus |
3 |
0.903 |
0.167 |
2 |
0.500 |
0.707 |
0 |
| gymnosperm |
9 |
0.975 |
0.050 |
15 |
0.161 |
0.353 |
0 |
| invertebrate |
77 |
0.671 |
0.309 |
64 |
0.278 |
0.406 |
4 |
| mammal |
80 |
0.937 |
0.161 |
95 |
0.419 |
0.461 |
22 |
| other |
13 |
0.856 |
0.142 |
6 |
0.000 |
0.000 |
3 |
| pteridophytes |
8 |
0.824 |
0.251 |
11 |
0.179 |
0.284 |
0 |
| reptile |
38 |
0.909 |
0.171 |
30 |
0.298 |
0.441 |
1 |
Detailed table:
x<-indicators_averaged_one %>%
group_by(country_assessment, taxonomic_group) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE),
Mon.ind=sum(temp_gen_monitoring=="yes"))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
| Australia |
amphibian |
0 |
NaN |
NA |
1 |
0.000 |
NA |
0 |
| Australia |
angiosperm |
2 |
0.700 |
0.424 |
15 |
0.115 |
0.276 |
1 |
| Australia |
bird |
9 |
1.000 |
0.000 |
9 |
0.167 |
0.264 |
2 |
| Australia |
bryophyte |
0 |
NaN |
NA |
1 |
0.500 |
NA |
0 |
| Australia |
fish |
1 |
1.000 |
NA |
2 |
0.500 |
0.707 |
1 |
| Australia |
gymnosperm |
0 |
NaN |
NA |
2 |
0.000 |
0.000 |
0 |
| Australia |
invertebrate |
1 |
0.500 |
NA |
0 |
NaN |
NA |
0 |
| Australia |
mammal |
3 |
0.750 |
0.250 |
10 |
0.303 |
0.359 |
3 |
| Australia |
other |
5 |
0.887 |
0.141 |
1 |
0.000 |
NA |
3 |
| Australia |
pteridophytes |
0 |
NaN |
NA |
1 |
0.000 |
NA |
0 |
| Australia |
reptile |
7 |
0.958 |
0.078 |
5 |
0.050 |
0.112 |
0 |
| Belgium |
amphibian |
3 |
0.310 |
0.170 |
9 |
0.189 |
0.329 |
1 |
| Belgium |
angiosperm |
5 |
0.446 |
0.279 |
26 |
0.093 |
0.219 |
0 |
| Belgium |
bryophyte |
1 |
0.444 |
NA |
1 |
0.000 |
NA |
0 |
| Belgium |
fish |
5 |
0.570 |
0.153 |
9 |
0.206 |
0.352 |
2 |
| Belgium |
gymnosperm |
0 |
NaN |
NA |
1 |
0.050 |
NA |
0 |
| Belgium |
invertebrate |
10 |
0.444 |
0.259 |
30 |
0.323 |
0.416 |
3 |
| Belgium |
mammal |
3 |
0.444 |
0.192 |
19 |
0.447 |
0.497 |
4 |
| Belgium |
pteridophytes |
0 |
NaN |
NA |
2 |
0.250 |
0.354 |
0 |
| Belgium |
reptile |
0 |
NaN |
NA |
4 |
0.030 |
0.026 |
0 |
| Colombia |
amphibian |
2 |
0.625 |
0.177 |
0 |
NaN |
NA |
0 |
| Colombia |
angiosperm |
6 |
1.000 |
0.000 |
6 |
0.000 |
0.000 |
0 |
| Colombia |
bird |
35 |
0.795 |
0.242 |
29 |
0.448 |
0.506 |
NA |
| Colombia |
fish |
2 |
1.000 |
0.000 |
2 |
0.500 |
0.707 |
0 |
| Colombia |
mammal |
1 |
0.500 |
NA |
1 |
0.000 |
NA |
0 |
| Colombia |
other |
1 |
1.000 |
NA |
1 |
0.000 |
NA |
0 |
| Colombia |
reptile |
3 |
1.000 |
0.000 |
2 |
0.000 |
0.000 |
0 |
| France |
amphibian |
1 |
1.000 |
NA |
1 |
0.000 |
NA |
1 |
| France |
angiosperm |
3 |
0.667 |
0.577 |
6 |
0.583 |
0.492 |
0 |
| France |
bird |
11 |
0.852 |
0.259 |
20 |
0.342 |
0.460 |
1 |
| France |
fish |
1 |
0.167 |
NA |
6 |
0.589 |
0.463 |
2 |
| France |
fungus |
1 |
1.000 |
NA |
1 |
1.000 |
NA |
0 |
| France |
gymnosperm |
1 |
1.000 |
NA |
2 |
1.000 |
0.000 |
0 |
| France |
invertebrate |
3 |
0.700 |
0.265 |
7 |
0.405 |
0.508 |
0 |
| France |
mammal |
11 |
0.955 |
0.151 |
10 |
0.217 |
0.416 |
3 |
| France |
other |
1 |
0.900 |
NA |
0 |
NaN |
NA |
0 |
| France |
reptile |
1 |
1.000 |
NA |
2 |
0.500 |
0.707 |
0 |
| Japan |
angiosperm |
39 |
0.931 |
0.130 |
39 |
0.061 |
0.148 |
0 |
| Japan |
gymnosperm |
4 |
1.000 |
0.000 |
4 |
0.000 |
0.000 |
0 |
| Japan |
pteridophytes |
7 |
0.847 |
0.262 |
7 |
0.210 |
0.316 |
0 |
| Mexico |
amphibian |
0 |
NaN |
NA |
2 |
0.000 |
0.000 |
0 |
| Mexico |
angiosperm |
20 |
0.959 |
0.120 |
29 |
0.236 |
0.339 |
5 |
| Mexico |
bird |
1 |
0.667 |
NA |
2 |
0.500 |
0.707 |
1 |
| Mexico |
fish |
0 |
NaN |
NA |
0 |
NaN |
NA |
0 |
| Mexico |
gymnosperm |
2 |
0.886 |
0.005 |
6 |
0.061 |
0.148 |
0 |
| Mexico |
invertebrate |
1 |
1.000 |
NA |
0 |
NaN |
NA |
0 |
| Mexico |
mammal |
3 |
0.867 |
0.231 |
3 |
0.000 |
0.000 |
1 |
| Mexico |
pteridophytes |
0 |
NaN |
NA |
1 |
0.000 |
NA |
0 |
| Mexico |
reptile |
1 |
1.000 |
NA |
4 |
0.500 |
0.577 |
0 |
| S. Africa |
amphibian |
18 |
0.918 |
0.173 |
4 |
0.125 |
0.250 |
2 |
| S. Africa |
angiosperm |
12 |
0.833 |
0.277 |
10 |
0.060 |
0.190 |
0 |
| S. Africa |
bird |
11 |
1.000 |
0.000 |
11 |
0.327 |
0.467 |
1 |
| S. Africa |
fish |
9 |
1.000 |
0.000 |
4 |
0.297 |
0.477 |
0 |
| S. Africa |
gymnosperm |
1 |
1.000 |
NA |
0 |
NaN |
NA |
0 |
| S. Africa |
invertebrate |
0 |
NaN |
NA |
0 |
NaN |
NA |
0 |
| S. Africa |
mammal |
32 |
0.992 |
0.044 |
31 |
0.608 |
0.480 |
2 |
| S. Africa |
reptile |
7 |
0.869 |
0.254 |
1 |
1.000 |
NA |
0 |
| Sweden |
amphibian |
13 |
0.891 |
0.183 |
7 |
0.232 |
0.233 |
5 |
| Sweden |
angiosperm |
22 |
0.622 |
0.259 |
18 |
0.159 |
0.258 |
0 |
| Sweden |
bird |
11 |
0.696 |
0.385 |
9 |
0.111 |
0.333 |
2 |
| Sweden |
bryophyte |
2 |
0.904 |
0.048 |
0 |
NaN |
NA |
0 |
| Sweden |
fish |
7 |
0.738 |
0.290 |
4 |
0.299 |
0.476 |
4 |
| Sweden |
fungus |
2 |
0.855 |
0.205 |
1 |
0.000 |
NA |
0 |
| Sweden |
invertebrate |
29 |
0.674 |
0.292 |
20 |
0.078 |
0.225 |
0 |
| Sweden |
mammal |
20 |
0.986 |
0.047 |
15 |
0.361 |
0.447 |
8 |
| Sweden |
other |
6 |
0.800 |
0.153 |
4 |
0.000 |
0.000 |
0 |
| Sweden |
pteridophytes |
1 |
0.667 |
NA |
0 |
NaN |
NA |
0 |
| Sweden |
reptile |
7 |
0.983 |
0.045 |
3 |
0.619 |
0.541 |
1 |
| US |
amphibian |
6 |
0.754 |
0.267 |
0 |
NaN |
NA |
0 |
| US |
angiosperm |
35 |
0.867 |
0.181 |
37 |
0.348 |
0.402 |
0 |
| US |
bird |
5 |
0.741 |
0.205 |
9 |
0.254 |
0.375 |
2 |
| US |
bryophyte |
1 |
0.500 |
NA |
0 |
NaN |
NA |
0 |
| US |
fish |
17 |
0.737 |
0.198 |
7 |
0.615 |
0.448 |
2 |
| US |
gymnosperm |
1 |
1.000 |
NA |
0 |
NaN |
NA |
0 |
| US |
invertebrate |
33 |
0.730 |
0.324 |
7 |
0.533 |
0.493 |
1 |
| US |
mammal |
7 |
0.905 |
0.194 |
6 |
0.303 |
0.351 |
1 |
| US |
reptile |
12 |
0.823 |
0.202 |
9 |
0.302 |
0.460 |
0 |
IUCN
Summary stats:
x<-indicators_averaged_one %>%
group_by(global_IUCN) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE),
Mon.ind=sum(temp_gen_monitoring=="yes"))
# nice table
kable(x, digits=3)
| cr |
40 |
0.843 |
0.263 |
44 |
0.114 |
0.289 |
8 |
| en |
59 |
0.786 |
0.254 |
47 |
0.265 |
0.418 |
9 |
| vu |
73 |
0.805 |
0.248 |
65 |
0.312 |
0.417 |
4 |
| nt |
50 |
0.849 |
0.249 |
50 |
0.237 |
0.375 |
7 |
| lc |
154 |
0.849 |
0.250 |
179 |
0.377 |
0.439 |
32 |
| dd |
9 |
0.707 |
0.313 |
10 |
0.442 |
0.490 |
2 |
| not_assessed |
157 |
0.838 |
0.233 |
159 |
0.184 |
0.326 |
3 |
| unknown |
2 |
1.000 |
0.000 |
3 |
0.667 |
0.577 |
0 |
| NA |
0 |
NaN |
NA |
1 |
0.000 |
NA |
NA |
Detailed table by IUCN category:
x<-indicators_averaged_one %>%
group_by(country_assessment, global_IUCN) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE),
Mon.ind=sum(temp_gen_monitoring=="yes"))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
| Australia |
cr |
5 |
0.860 |
0.219 |
10 |
0.000 |
0.000 |
3 |
| Australia |
en |
4 |
0.850 |
0.300 |
7 |
0.167 |
0.264 |
2 |
| Australia |
vu |
6 |
0.943 |
0.101 |
8 |
0.260 |
0.355 |
1 |
| Australia |
nt |
4 |
1.000 |
0.000 |
5 |
0.353 |
0.328 |
0 |
| Australia |
lc |
3 |
1.000 |
0.000 |
8 |
0.229 |
0.367 |
1 |
| Australia |
not_assessed |
6 |
0.822 |
0.202 |
9 |
0.128 |
0.329 |
3 |
| Australia |
unknown |
0 |
NaN |
NA |
0 |
NaN |
NA |
0 |
| Belgium |
cr |
1 |
0.333 |
NA |
2 |
0.500 |
0.707 |
0 |
| Belgium |
en |
1 |
0.455 |
NA |
1 |
0.000 |
NA |
0 |
| Belgium |
vu |
3 |
0.548 |
0.410 |
3 |
0.333 |
0.577 |
0 |
| Belgium |
nt |
2 |
0.310 |
0.034 |
13 |
0.030 |
0.058 |
3 |
| Belgium |
lc |
19 |
0.466 |
0.215 |
64 |
0.285 |
0.397 |
7 |
| Belgium |
dd |
1 |
0.333 |
NA |
3 |
0.364 |
0.553 |
0 |
| Belgium |
not_assessed |
0 |
NaN |
NA |
14 |
0.151 |
0.292 |
0 |
| Belgium |
unknown |
0 |
NaN |
NA |
1 |
1.000 |
NA |
0 |
| Colombia |
cr |
7 |
0.843 |
0.270 |
7 |
0.000 |
0.000 |
0 |
| Colombia |
en |
5 |
0.620 |
0.247 |
3 |
0.667 |
0.577 |
0 |
| Colombia |
vu |
20 |
0.812 |
0.225 |
15 |
0.133 |
0.352 |
0 |
| Colombia |
nt |
11 |
0.877 |
0.225 |
6 |
0.667 |
0.516 |
0 |
| Colombia |
lc |
7 |
0.952 |
0.126 |
9 |
0.667 |
0.500 |
0 |
| Colombia |
NA |
0 |
NaN |
NA |
1 |
0.000 |
NA |
NA |
| France |
cr |
2 |
0.583 |
0.589 |
5 |
0.040 |
0.089 |
1 |
| France |
en |
1 |
1.000 |
NA |
3 |
0.333 |
0.577 |
1 |
| France |
vu |
4 |
0.725 |
0.320 |
9 |
0.481 |
0.467 |
0 |
| France |
nt |
7 |
0.839 |
0.277 |
6 |
0.333 |
0.516 |
0 |
| France |
lc |
17 |
0.953 |
0.133 |
28 |
0.476 |
0.482 |
4 |
| France |
dd |
0 |
NaN |
NA |
2 |
1.000 |
0.000 |
1 |
| France |
not_assessed |
3 |
0.633 |
0.551 |
2 |
0.000 |
0.000 |
0 |
| Japan |
cr |
1 |
1.000 |
NA |
1 |
0.000 |
NA |
0 |
| Japan |
not_assessed |
49 |
0.923 |
0.153 |
49 |
0.079 |
0.181 |
0 |
| Mexico |
cr |
4 |
1.000 |
0.000 |
3 |
0.333 |
0.577 |
1 |
| Mexico |
en |
9 |
0.919 |
0.163 |
12 |
0.083 |
0.289 |
3 |
| Mexico |
vu |
5 |
0.900 |
0.224 |
5 |
0.000 |
0.000 |
1 |
| Mexico |
nt |
1 |
0.889 |
NA |
2 |
0.000 |
0.000 |
0 |
| Mexico |
lc |
5 |
0.936 |
0.092 |
12 |
0.497 |
0.367 |
2 |
| Mexico |
dd |
1 |
1.000 |
NA |
1 |
0.333 |
NA |
0 |
| Mexico |
not_assessed |
3 |
0.958 |
0.072 |
12 |
0.158 |
0.318 |
0 |
| S. Africa |
cr |
14 |
0.860 |
0.285 |
12 |
0.042 |
0.144 |
2 |
| S. Africa |
en |
16 |
0.895 |
0.182 |
9 |
0.467 |
0.469 |
1 |
| S. Africa |
vu |
14 |
0.982 |
0.067 |
12 |
0.500 |
0.522 |
1 |
| S. Africa |
nt |
8 |
0.969 |
0.088 |
8 |
0.253 |
0.356 |
0 |
| S. Africa |
lc |
34 |
1.000 |
0.000 |
18 |
0.667 |
0.485 |
1 |
| S. Africa |
dd |
1 |
1.000 |
NA |
0 |
NaN |
NA |
0 |
| S. Africa |
not_assessed |
2 |
0.750 |
0.354 |
1 |
0.000 |
NA |
0 |
| S. Africa |
unknown |
1 |
1.000 |
NA |
1 |
1.000 |
NA |
0 |
| Sweden |
en |
5 |
0.489 |
0.208 |
2 |
0.050 |
0.071 |
0 |
| Sweden |
vu |
7 |
0.685 |
0.247 |
7 |
0.297 |
0.363 |
1 |
| Sweden |
nt |
8 |
0.816 |
0.273 |
5 |
0.054 |
0.074 |
1 |
| Sweden |
lc |
63 |
0.836 |
0.259 |
39 |
0.258 |
0.380 |
17 |
| Sweden |
dd |
4 |
0.549 |
0.299 |
4 |
0.250 |
0.500 |
1 |
| Sweden |
not_assessed |
33 |
0.744 |
0.268 |
24 |
0.085 |
0.228 |
0 |
| US |
cr |
6 |
0.828 |
0.164 |
4 |
0.583 |
0.419 |
1 |
| US |
en |
18 |
0.743 |
0.268 |
10 |
0.300 |
0.483 |
2 |
| US |
vu |
14 |
0.664 |
0.271 |
6 |
0.464 |
0.323 |
0 |
| US |
nt |
9 |
0.796 |
0.289 |
5 |
0.284 |
0.435 |
3 |
| US |
lc |
6 |
0.791 |
0.208 |
1 |
0.000 |
NA |
0 |
| US |
dd |
2 |
0.917 |
0.118 |
0 |
NaN |
NA |
0 |
| US |
not_assessed |
61 |
0.829 |
0.234 |
48 |
0.379 |
0.418 |
0 |
| US |
unknown |
1 |
1.000 |
NA |
1 |
0.000 |
NA |
0 |
Distribution type
Summary stats:
x<-indicators_averaged_one %>%
group_by(species_range) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE),
Mon.ind=sum(temp_gen_monitoring=="yes"))
# nice table
kable(x, digits=3)
| restricted |
332 |
0.810 |
0.262 |
310 |
0.188 |
0.345 |
24 |
| unknown |
18 |
0.832 |
0.250 |
19 |
0.316 |
0.478 |
1 |
| wide ranging |
194 |
0.867 |
0.217 |
228 |
0.388 |
0.434 |
40 |
| NA |
0 |
NaN |
NA |
1 |
0.000 |
NA |
NA |
Detailed table by IUCN category:
x<-indicators_averaged_one %>%
group_by(country_assessment, species_range) %>%
summarise(n.PM.ind=sum(!is.na(indicator2)),
mean.PM.ind=mean(indicator2, na.rm=TRUE),
sd.PM.ind=sd(indicator2, na.rm=TRUE),
n.Ne.ind=sum(!is.na(indicator1)),
mean.Ne.ind=mean(indicator1, na.rm=TRUE),
sd.Ne.ind=sd(indicator1, na.rm=TRUE),
Mon.ind=sum(temp_gen_monitoring=="yes"))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
| Australia |
restricted |
14 |
0.865 |
0.224 |
27 |
0.114 |
0.253 |
4 |
| Australia |
unknown |
0 |
NaN |
NA |
1 |
0.000 |
NA |
0 |
| Australia |
wide ranging |
14 |
0.942 |
0.110 |
19 |
0.260 |
0.347 |
6 |
| Belgium |
restricted |
10 |
0.319 |
0.128 |
22 |
0.135 |
0.262 |
1 |
| Belgium |
unknown |
2 |
0.456 |
0.062 |
5 |
0.000 |
0.000 |
1 |
| Belgium |
wide ranging |
15 |
0.542 |
0.242 |
74 |
0.295 |
0.411 |
8 |
| Colombia |
restricted |
39 |
0.842 |
0.227 |
28 |
0.286 |
0.460 |
0 |
| Colombia |
unknown |
9 |
0.785 |
0.264 |
9 |
0.556 |
0.527 |
0 |
| Colombia |
wide ranging |
2 |
0.833 |
0.236 |
3 |
0.333 |
0.577 |
0 |
| Colombia |
NA |
0 |
NaN |
NA |
1 |
0.000 |
NA |
NA |
| France |
restricted |
14 |
0.741 |
0.336 |
28 |
0.227 |
0.388 |
2 |
| France |
wide ranging |
20 |
0.933 |
0.202 |
27 |
0.611 |
0.476 |
5 |
| Japan |
restricted |
35 |
0.939 |
0.141 |
35 |
0.080 |
0.180 |
0 |
| Japan |
unknown |
1 |
1.000 |
NA |
1 |
0.000 |
NA |
0 |
| Japan |
wide ranging |
14 |
0.884 |
0.179 |
14 |
0.076 |
0.192 |
0 |
| Mexico |
restricted |
19 |
0.933 |
0.138 |
31 |
0.094 |
0.267 |
4 |
| Mexico |
unknown |
2 |
1.000 |
0.000 |
0 |
NaN |
NA |
0 |
| Mexico |
wide ranging |
7 |
0.926 |
0.150 |
16 |
0.456 |
0.385 |
3 |
| S. Africa |
restricted |
41 |
0.905 |
0.206 |
29 |
0.217 |
0.391 |
4 |
| S. Africa |
unknown |
2 |
1.000 |
0.000 |
1 |
1.000 |
NA |
0 |
| S. Africa |
wide ranging |
47 |
0.984 |
0.081 |
31 |
0.595 |
0.475 |
1 |
| Sweden |
restricted |
71 |
0.708 |
0.292 |
52 |
0.077 |
0.212 |
6 |
| Sweden |
unknown |
2 |
1.000 |
0.000 |
2 |
0.000 |
0.000 |
0 |
| Sweden |
wide ranging |
47 |
0.871 |
0.204 |
27 |
0.426 |
0.411 |
14 |
| US |
restricted |
89 |
0.813 |
0.243 |
58 |
0.378 |
0.420 |
3 |
| US |
unknown |
0 |
NaN |
NA |
0 |
NaN |
NA |
0 |
| US |
wide ranging |
28 |
0.735 |
0.244 |
17 |
0.339 |
0.407 |
3 |
Simplified figures and basic stats for graphical summary and policy
brief
How many species and pops:
How many species:
nrow(indicators_averaged_one)
## [1] 909
How many populations, including all pops from species that were
assessed more than once:
nrow(ind1_data)
## [1] 5049
How many populations, counting only once populations from taxa
assessed more than once:
# This looks for the id of the taxa already keeping only 1 for the multiassessed taxa, and keeps those int he ind1_data (where the pops data is)
x<-ind1_data[ind1_data$X_uuid %in% indicators_averaged_one$X_uuid, ]
# the number of rows is the number of pops counting only once multiassessed taxa
nrow(x)
## [1] 4707
Create new variables with simplified taxonomic groups
Animals, plants, others
# Define the grouping map
grouping_map <- c(
"amphibian", "bird", "fish", "invertebrate", "mammal",
"angiosperm", "gymnosperm", "reptile", "pteridophytes", "others"
)
# Create a new variable taxonomic_group_3
indicators_averaged_one <- indicators_averaged_one %>%
mutate(
taxonomic_group_3 = case_when(
taxonomic_group_simplified %in% grouping_map[1:5] ~ "animals",
taxonomic_group_simplified %in% grouping_map[6:9] ~ "plants",
taxonomic_group_simplified %in% grouping_map[10] ~ "others",
TRUE ~ NA_character_
)
)
# reorder levels
indicators_averaged_one$taxonomic_group_3<- factor(indicators_averaged_one$taxonomic_group_3,
levels=c("animals", "plants", "others"))
Histogram for Ne > 500 indicator
By animals, plants, others:
# Create a histogram
hist_p <- indicators_averaged_one %>%
ggplot(aes(x = indicator1_mean, fill = taxonomic_group_3)) +
geom_histogram( bins = 25, color="white") + # Adjust the number of bins as needed
labs(x = "Proportion of populations with Ne>500", y = "Frequency") +
scale_fill_manual(
values = grouped_taxon_colors, # Custom colors for animals, plants, and others
breaks = c("animals", "plants", "others"),
name = "Taxonomic Group")+
theme_light() +
theme(panel.border = element_blank(), text = element_text(size = 15),
legend.position = "bottom") +
guides(fill = guide_legend(title = NULL))
# plot
hist_p
## Warning: Removed 348 rows containing non-finite values (`stat_bin()`).

Plain histogram:
# Create a histogram
hist_p <- indicators_averaged_one %>%
ggplot(aes(x = indicator1_mean)) +
geom_histogram( bins = 25, fill="grey30") + # Adjust the number of bins as needed
labs(x = "Proportion of populations with Ne>500", y = "Frequency") +
theme_light() +
theme(panel.border = element_blank(), text = element_text(size = 15)) +
guides(fill = guide_legend(title = NULL))
# plot
hist_p
## Warning: Removed 348 rows containing non-finite values (`stat_bin()`).

Summary table:
Summary table for Ne > 500 indicator
x <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(taxonomic_group_3)) %>%
group_by(taxonomic_group_3) %>%
summarize(n=n(),
mean=mean(indicator1_mean),
median=median(indicator1_mean),
per.0=sum(indicator1_mean==0) / n *100,
per.below.25=sum(indicator1_mean<0.25) / n *100,
per.below.90=sum(indicator1_mean<0.90) / n *100,
per.above.75=sum(indicator1_mean>0.75)/ n *100,
per1=sum(indicator1_mean==1) / n *100)
# Calculate total counts and means
total_counts <- indicators_averaged_one %>%
filter(!is.na(indicator1_mean)) %>%
filter(!is.na(taxonomic_group_3)) %>%
ungroup() %>%
summarize(taxonomic_group_3 = "ALL",
n= n(),
mean = mean(indicator1_mean),
median = median(indicator1_mean),
per.0=sum(indicator1_mean==0) / n *100,
per.below.25=sum(indicator1_mean<0.25) / n *100,
per.below.90=sum(indicator1_mean<0.90) / n *100,
per.above.75=sum(indicator1_mean>0.75)/ n *100,
per1=sum(indicator1_mean==1) / n *100)
# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)
# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_3<-factor(summary_table$taxonomic_group_3,
levels = c("animals", "plants", "others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_3)
kable(summary_table, digits=2)
| animals |
308 |
0.34 |
0 |
54.22 |
60.06 |
73.38 |
26.95 |
26.62 |
| plants |
243 |
0.19 |
0 |
61.73 |
73.25 |
90.12 |
10.70 |
9.88 |
| others |
10 |
0.15 |
0 |
80.00 |
80.00 |
90.00 |
10.00 |
10.00 |
| ALL |
561 |
0.27 |
0 |
57.93 |
66.13 |
80.93 |
19.61 |
19.07 |
Histogram for Proportion Mantained populations
Histogram for animal, plants, others:
# Create a histogram
hist_p <- indicators_averaged_one %>%
ggplot(aes(x = indicator2_mean, fill = taxonomic_group_3)) +
geom_histogram(bins = 25, color="white") + # Adjust the number of bins as needed
labs(x = "Proportion of maintained populations", y = "Frequency") +
scale_fill_manual(
values = grouped_taxon_colors, # Custom colors for animals, plants, and others
breaks = c("animals", "plants", "others"),
name = "Taxonomic Group")+
theme_light() +
theme(panel.border = element_blank(), text = element_text(size = 15)) +
guides(fill = guide_legend(title = NULL))
# plot
hist_p
## Warning: Removed 363 rows containing non-finite values (`stat_bin()`).

Plain histogram
# Create a histogram
hist_p <- indicators_averaged_one %>%
ggplot(aes(x = indicator2_mean)) +
geom_histogram(bins = 25, fill="grey30") + # Adjust the number of bins as needed
labs(x = "Proportion of maintained populations", y = "Frequency") +
theme_light() +
theme(panel.border = element_blank(), text = element_text(size = 15)) +
guides(fill = guide_legend(title = NULL))
# plot
hist_p
## Warning: Removed 363 rows containing non-finite values (`stat_bin()`).

Summary table:
Summary table for Proportion Mantaiend populations
x <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(taxonomic_group_3)) %>%
group_by(taxonomic_group_3) %>%
summarize(n=n(),
mean=mean(indicator2_mean),
median=median(indicator2_mean),
per0=sum(indicator2_mean==0) / n *100,
per.below.25=sum(indicator2_mean<0.25) / n *100,
per.below.90=sum(indicator2_mean<0.90) / n *100,
per.above.75=sum(indicator2_mean>0.75) / n *100,
per1=sum(indicator2_mean==1) / n *100)
# Calculate total counts and means
total_counts <- indicators_averaged_one %>%
filter(!is.na(indicator2_mean)) %>%
filter(!is.na(taxonomic_group_3)) %>%
ungroup() %>%
summarize(taxonomic_group_3 = "ALL",
n= n(),
mean = mean(indicator2_mean),
median = median(indicator2_mean),
per0=sum(indicator2_mean==0) / n *100,
per.below.25=sum(indicator2_mean<0.25) / n *100,
per.below.90=sum(indicator2_mean<0.90) / n *100,
per.above.75=sum(indicator2_mean>0.75) / n *100,
per1=sum(indicator2_mean==1) / n *100)
# Bind the total row to the summary_table
summary_table <- bind_rows(x, total_counts)
# keep taxonomic groups as level in desired order:
summary_table$taxonomic_group_3<-factor(summary_table$taxonomic_group_3,
levels = c("animals", "plants", "others", "ALL"))
summary_table<- summary_table %>% arrange(taxonomic_group_3)
kable(summary_table, digits=2)
| animals |
326 |
0.82 |
1.00 |
0.61 |
3.37 |
39.57 |
68.1 |
56.44 |
| plants |
200 |
0.86 |
1.00 |
0.50 |
2.00 |
35.50 |
75.5 |
58.00 |
| others |
20 |
0.83 |
0.89 |
0.00 |
0.00 |
50.00 |
65.0 |
30.00 |
| ALL |
546 |
0.83 |
1.00 |
0.55 |
2.75 |
38.46 |
70.7 |
56.04 |
Data availability donuts and plot bars
Species level yes/no. Donut.
df<- metadata %>%
filter(popsize_data != "data_for_species") %>% # we want to show only data for pops or insufficient
group_by(popsize_data) %>%
summarise(n=n(),
percentage = (n / nrow(metadata)) * 100)
kable(df, digits = 0)
| insuff_data_species |
216 |
23 |
| yes |
611 |
64 |
# variable to make change the size of the hole
hsize <- 2 # to change the size of the hole. larger=bigger
df <- df %>%
mutate(x = hsize)
# donut plot
p <- ggplot(df, aes(x = hsize, y = n, fill = popsize_data)) +
geom_col() +
coord_polar(theta = "y") +
scale_fill_manual(values=c("#2ca02c", "grey80"),
breaks=c("yes", "insuff_data_species"),
labels=c("Population level", "Insufficient data")) +
xlim(c(0.2, hsize + 0.5)) + theme_void()
p

Species level yes/no. Bar plot
metadata %>%
filter(popsize_data != "data_for_species") %>% # we want to show only data for pops or insufficient
ggplot(aes(x=country_assessment, fill = popsize_data)) +
geom_bar(position = "fill", color="white") +
scale_fill_manual(values=c("#2ca02c", "grey80"),
breaks=c("yes", "insuff_data_species"),
labels=c("Population level", "Insufficient data")) +
scale_x_discrete(limits=rev) + xlab("") + ylab("Data availability (% of species)") +
coord_flip() +
theme_light()

Population level, what kind?
# we first need the column numbers
df<-ind1_data %>%
mutate(Ne_calculated_from = replace_na(Ne_calculated_from, "no data available")) %>%
group_by(Ne_calculated_from) %>%
summarise(n=n(),
percentage = (n / nrow(ind1_data)) * 100)
kable(df, digits = 0)
| genetic data |
349 |
7 |
| NcPoint ratio |
1147 |
23 |
| NcRange ratio |
2562 |
51 |
| no data available |
991 |
20 |
# variable to make change the size of the hole
hsize <- 3 # to change the size of the hole. larger=bigger
df <- df %>%
mutate(x = hsize)
# donut plot
p <- ggplot(df, aes(x = hsize, y = n, fill = Ne_calculated_from)) +
geom_col() +
coord_polar(theta = "y") +
scale_fill_manual(labels=c("genetic data", "NcPoint ratio", "NcRange ratio", "no data available"),
breaks=c("genetic data", "NcPoint ratio", "NcRange ratio", "no data available"),
values=c("darkgreen", "#0072B2", "#E69F00", "grey80")) +
xlim(c(0.2, hsize + 0.5)) + theme_void()
p

Session Info for reproducibility purposes:
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggnewscale_0.4.9 glmmTMB_1.1.7 knitr_1.39 lme4_1.1-31
## [5] Matrix_1.5-3 cowplot_1.1.1 viridis_0.6.3 viridisLite_0.4.0
## [9] alluvial_0.1-2 ggsankey_0.0.99999 ggplot2_3.4.1 stringr_1.4.0
## [13] utile.tools_0.2.7 readr_2.1.2 dplyr_1.0.9 tidyr_1.2.0
##
## loaded via a namespace (and not attached):
## [1] TMB_1.9.6 tidyselect_1.1.2 xfun_0.31
## [4] bslib_0.3.1 purrr_0.3.4 splines_4.2.1
## [7] lattice_0.20-45 colorspace_2.0-3 vctrs_0.5.2
## [10] generics_0.1.3 htmltools_0.5.5 yaml_2.3.5
## [13] utf8_1.2.2 rlang_1.0.6 nloptr_2.0.3
## [16] jquerylib_0.1.4 pillar_1.7.0 glue_1.6.2
## [19] withr_2.5.0 DBI_1.1.3 lifecycle_1.0.3
## [22] munsell_0.5.0 gtable_0.3.0 evaluate_0.15
## [25] labeling_0.4.2 tzdb_0.3.0 fastmap_1.1.0
## [28] fansi_1.0.3 highr_0.9 Rcpp_1.0.10
## [31] scales_1.2.0 jsonlite_1.8.0 farver_2.1.1
## [34] gridExtra_2.3 hms_1.1.1 digest_0.6.29
## [37] stringi_1.7.6 numDeriv_2016.8-1.1 grid_4.2.1
## [40] cli_3.6.0 tools_4.2.1 magrittr_2.0.3
## [43] sass_0.4.1 tibble_3.1.7 crayon_1.5.1
## [46] pkgconfig_2.0.3 ellipsis_0.3.2 MASS_7.3-57
## [49] minqa_1.2.5 assertthat_0.2.1 rmarkdown_2.14
## [52] rstudioapi_0.13 boot_1.3-28 R6_2.5.1
## [55] nlme_3.1-157 compiler_4.2.1